Skip to content

RefXAS - Reference database for XAS

Created on Mon Mar 23 14:33:49 2020

@authors: Frank Foerste and Sebastian Paripsa ffoerste@physik.tu-berlin.de, paripsa@uni-wuppertal.de

read_data

Bases: object

This class helps to read in different data types, .dat, .xdi are currently supported. A support for default detection of data by larch is also implemented.

Source code in plugins/read_data.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
class read_data(object):
    """
    This class helps to read in different data types, .dat, .xdi are currently
    supported. A support for default detection of data by larch is also
    implemented.
    """

    def __init__(
        self, source="SYNCHROTRON", update_erange="", scan_number=0, verbose=False
    ):
        """
        Initializing the read_data class. This class helps to read in different
        data types for XAS measurements, currently .dat, .xdi, .spec are
        supported. A support for default detection of data by larch is also
        implemented.

        Parameters
        ----------
        source : str, optional
            Type of source. Either SYNCHROTRON or LABORATORY.
            The default is "SYNCHROTRON".
        update_erange: dict, optional
            Updated energy range.
        scan_number : int, str, optional
            Number or name of the scan to check. Important for .mca, .spec and
            .h5 (from ESRF) For other file types this parameter is ignored.
            The default is 0.
        verbose : bool, optional
            Activate or deactivate the verbose mode. The default is False.
        """

        ### give the class a version, x.y.date(YYYYMMDD)
        self.version = "0.1.20230612"
        self.scan_number = scan_number
        self.verbose = verbose
        if self.verbose:
            print("read_data class initialized -- v{}".format(self.version))
        self.source = source
        ### initialize dictionaries with supported beamlines and their specific
        ### keywords by whom the beamline is identified. These are characteristic
        ### for each file and has to be adapted when a change of syntax occurs
        ### in the beamline datasets
        self.supported_beamlines = {
            "SYNCHROTRON": [
                "CATACT KIT",
                "PETRA III Extension Beamline P65",
                "ELETTRA XAFS",
                "SLRI",
                "ESRF BM 23",
                "SOLEIL ROCK",
                "SOLEIL SAMBA",
                "SLS",
                "DELTA",
                "SOLARIS",
            ],
            "LABORATORY": [
                "TU Berlin",
            ],
        }
        self.beamline_key_words = {
            "SYNCHROTRON": {
                "CATACT KIT": [
                    "catexp",
                ],
                "PETRA III Extension Beamline P65": [
                    "PETRA III Extension Beamline P65",
                ],
                "ELETTRA XAFS": ["Project Name:"],
                "SLRI": ["BL8: X-ray Absorption Spectroscopy"],
                "ESRF BM 23": ["#ZapEnergy"],
                "SOLEIL ROCK": ["Synchrotron SOLEIL"],
                "SOLEIL SAMBA": [
                    "# Energy, Theta, XMU, FLUO, REF, FLUO_RAW, I0, I1, I2, I3"
                ],
                "SLS": ["#posX	SAI01-MEAN	SAI02-MEAN"],
                "DELTA": ["# created:"],
                "SOLARIS": ["#C Acquisition started"],
            },
            "LABORATORY": {
                "TU Berlin": ["# Energies_eV"],
            },
        }
        ### initialize the class to default
        self.reset_2_default()
        self.update_erange = update_erange

    def reset_2_default(
        self,
    ):
        """
        Function to set all relevant data to default.
        """
        ### set the beamline to None as undetected
        self.beamline = None
        self.meta_data_dict = {}
        ### the default source type is SYNCHROTRON. This is changed, when data
        ### is read
        self.source = "SYNCHROTRON"

    def process_data(self, data_path):
        """
        This function determines the datafile. If it is .hdf it will open
        the hdf-file with h5py, else it will open the datafile and read out
        the lines and checking for distinct keywords and starting the readout.

        Parameters
        ----------
        data_path : str
            absolute path to the xdi file.
        """
        self.data_path = data_path
        self.data_type = self.data_path.split(".")[-1]
        if self.data_type in ["h5", "hdf", "hdf5"]:
            self.load_hdf()
        elif self.data_type in [".spec"]:
            self.load_specfile()
        else:
            self.read_out_file()

    def read_out_file(
        self,
    ):
        """
        This function reads every line of the file and checks, if any known
        keyword is in the file. When succesful, the beamline is stored and the
        specified load_data function is called.
        """
        ### resetting to default, clear all data
        self.reset_2_default()
        ### use larch to read out data and the header of the file
        self.larch_data = read_ascii(self.data_path)
        self.header = self.larch_data.header
        ### now check the header for keywords for the specific beamlines
        for line in self.header:
            for beamline in self.supported_beamlines[self.source]:
                for keyword in self.beamline_key_words[self.source][beamline]:
                    if keyword in line:
                        self.beamline = beamline
                        break
        ### if a beamline was found, e
        if self.beamline:
            print("beamline found:\t", self.beamline)
            self.extract_header()
            self.load_data()
        else:
            print("no beamline found, going to numpy extraction mode")
            self.data = read_ascii(self.data_path).data[:2]
            ### check unit of energy, if value below 100 it is most likely keV
            ### --> change it to eV by multipying it with 1000
            if self.data[0, 0] < 100:
                self.data[0] *= 1000
            self.meta_data_dict["E_range_min"] = self.data[0, 0]
            self.meta_data_dict["E_range_max"] = self.data[0, -1]
            self.create_raw_plot_base64()

    def extract_header(self, data_path=None):
        """
        Function to exctract data from the header of the given datafile. This
        is of course specific for each beamline and has to be adapted and
        extended if changes in the beamline data occurs or new beamlines to be
        supported. Also until now only a limited set of metadata is read out.
        Namely: ['Facility', 'Beamline', 'Owner', 'Coll.code', 'Acq. mode',
                 'Temperature']

        Parameters
        ----------
        data_path : str
            Path to the loaded data.

        Returns
        -------
        dictionary
            containing all the found meta data

        """
        ### enable header extraction mode
        ### determine beamline
        if data_path:
            self.process_data(data_path=data_path)
        ### fill all the meta data contained in the files provided by the facilities
        ### into the dictionary
        # self.meta_data_dict = {}
        self.meta_data_dict["Header"] = self.header
        self.meta_data_dict["Header_str"] = "\n".join(self.header)
        self.meta_data_dict["Beamline"] = self.beamline
        coll_code = ""

        ### SYNCHROTRON
        if self.beamline == "CATACT KIT":
            self.meta_data_dict["Facility"] = "KIT Light Source"
            for line in self.header:
                line = line.replace("\n", "")
                if "#F" in line:
                    coll_code += line.split()[-1] + " "
                elif "#E" in line:
                    coll_code += line.split()[-1]
                elif "#D" in line:
                    coll_code += line.replace("#D ", "")
                    self.meta_data_dict["Coll.code"] = coll_code
                elif "#C" in line:
                    self.meta_data_dict["Owner"] = line.split()[-1]
            ### no further needed (as for 20230314) meta data in file
        elif self.beamline == "PETRA III Extension Beamline P65":
            self.meta_data_dict["Facility"] = "DESY"
            self.meta_data_dict["Beamline"] = "P65"
            ### no further needed (as for 20230314) meta data in file
        elif self.beamline == "ELETTRA XAFS":
            self.meta_data_dict["Facility"] = "ELETTRA XAFS"
            self.meta_data_dict["Beamline"] = "XAFS"
            for line in self.header:
                line = line.replace("\n", "")
                if "Project Name" in line:
                    self.meta_data_dict["Coll.code"] = line.split()[-1]
            ### no further needed (as for 20230314) meta data in file
        elif self.beamline == "SLRI":
            self.meta_data_dict["Facility"] = "SLRI"
            for line in self.header:
                line = line.replace("\n", "")
                if "#B" in line:
                    self.meta_data_dict["Beamline"] = line.split(":")[0].replace(
                        "#", ""
                    )
                elif "#Transmission" in line:
                    self.meta_data_dict["Acq. mode"] = "Transmission"
            ### no further needed (as for 20230314) meta data in file
        elif self.beamline == "ESRF BM 23":
            self.meta_data_dict["Facility"] = "ESRF"
            ### no further needed (as for 20230314) meta data in file
        elif self.beamline == "SOLEIL ROCK":
            self.meta_data_dict["Facility"] = "SOLEIL"
            for line in self.header:
                line = line.replace("\n", "")
                if "#Sample temperature" in line:
                    try:
                        self.meta_data_dict["Temperature"] = int(line.split("=")[-1])
                    except:
                        pass
            ### no further needed (as for 20230314) meta data in file
        elif self.beamline == "SOLEIL SAMBA":
            self.meta_data_dict["Facility"] = "SOLEIL"
            ### no further meta data in file
        elif self.beamline == "SLS":
            self.meta_data_dict["Facility"] = "SLS"
            self.meta_data_dict["Beamline"] = "SLS SuperXAS"
            ### no further meta data in file
        elif self.beamline == "SOLARIS":
            self.meta_data_dict["Facility"] = "SOLARIS"
            self.meta_data_dict["Beamline"] = "PIRX"  # TODO
            ### no further meta data in file

        ### LABORATORY
        elif self.beamline == "TU Berlin":
            self.meta_data_dict["Facility"] = "TU Berlin"
            ### no further meta data in file

        ### This dict is only for reference
        ### it represents the xafsdb Widget meta data keys, not important for
        ### read out
        dummy_dict = {
            "owner_group": "Owner group",
            "owner": "Owner",
            "contact_email": "Contact email",
            "Abstract": "Abstract",
            "coll_code": "Coll.code",
            "physical_state": "Phys.state",
            "crystal_orientation": "Crystal orientation",
            "temperature": "Temperature",
            "pressure": "Pressure",
            "sample_environment": "Sample environment",
            "general_remarks": "General remarks",
            "facility": "Facility",
            "beamline": "Beamline",
            "aquisition_mode": "Acq. mode",
            "crystals": "Crystals",
            "mirrors": "Mirrors",
            "detectors": "Detectors",
            "element_input": "Element",
            "edge_input": "Edge",
            "max_k_range": "Max k-range",
            "doi": "DOI",
            "EnergyColumn": "Energy Column",
            "I_zeroColumn": "I0 Column",
            "TransmissionColumn": "Transmission Column",
            "MuColumn": "Mu Column",
            "reference": "Reference",
        }
        return self.meta_data_dict

    def load_data(
        self,
    ):
        """
        This function opens the data file and returns the data in the form of
        array([energy, mu]) depending of the found beamline. This has to be
        adapted and extended if changes in the beamline data occurs or new
        beamlines to be supported.

        Returns
        -------
        numpy.array
            array([energy, mu])
        """
        data = self.larch_data.data

        ### SYNCHROTRON
        if self.beamline == "CATACT KIT":
            # self.larch_data.energy = data[0, :]*1000 ### correct
            self.larch_data.energy = data[0, :]
            self.larch_data.I0 = data[6, :]
            self.larch_data.transmission = data[5, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "PETRA III Extension Beamline P65":
            self.larch_data.energy = data[1, :]
            self.larch_data.I0 = data[12, :]
            self.larch_data.transmission = data[11, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "ELETTRA XAFS":
            self.larch_data.energy = data[0, :]
            self.larch_data.I0 = data[3, :]
            self.larch_data.transmission = data[2, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "SLRI":
            self.larch_data.energy = data[0, :]
            self.larch_data.I0 = data[4, :]
            self.larch_data.transmission = data[3, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "ESRF BM 23":
            # self.larch_data.energy = data[0, :]*1000 ### correct
            self.larch_data.energy = data[0, :]
            self.larch_data.I0 = data[2, :]
            self.larch_data.transmission = data[1, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "SOLEIL ROCK":
            self.larch_data.energy = data[0, :]
            self.larch_data.mu = data[1, :]
        elif self.beamline == "SOLEIL SAMBA":
            self.larch_data.energy = data[0, :]
            self.larch_data.I0 = data[8, :]
            self.larch_data.transmission = data[6, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "SLS":
            # self.larch_data.energy = data[0, :]*1000 ### correct
            self.larch_data.energy = data[0, :]
            self.larch_data.I0 = data[3, :]
            self.larch_data.transmission = data[2, :]
            self.larch_data.mu = np.log(
                self.larch_data.transmission / self.larch_data.I0
            )
        elif self.beamline == "DELTA":
            self.larch_data.energy = data[0, :]
            self.larch_data.mu = data[1, :]
        elif self.beamline == "SOLARIS":
            self.larch_data.mu = self.larch_data.d2 / self.larch_data.sr

        ### LABORATORY
        elif self.beamline == "TU Berlin":
            self.larch_data.energy = data[0, :]
            self.larch_data.mu = data[1, :]

        ### check unit of energy, if value below 100 it is most likely keV
        ### --> change it to eV by multipying it with 1000
        self.larch_data.energy = self.keV2eV(self.larch_data.energy)
        ### convert data to numpy array with [energy, mu]
        self.data = np.array([self.larch_data.energy, self.larch_data.mu])
        ### set e-range
        self.meta_data_dict["E_range_min"] = np.round(self.data[0, 0], decimals=1)
        self.meta_data_dict["E_range_max"] = np.round(self.data[0, -1], decimals=1)
        ### create raw plot
        self.create_raw_plot_base64()

    def load_hdf(
        self,
    ):
        """
        This function reads out h5 files. At the moment (20230614) only ESRF is
        providing their measurement data in an h5 file for the user. So only
        ESRF h5 format is supported. If more facilities are providing h5 files
        either a standard form is given or the facility has to be automatically
        determined and a adapted procedure has to be developed.
        """
        ### first check if a scan number is provided. If not set it to default
        ### '1.1'
        self.header = ""

        if type(self.scan_number) == int:
            self.scan_number = "1.1"
            if self.verbose:
                print("No scan number for hdf5 files provided, setting to default 1.1")
        ### now open the h5 and retrieve the measurement data
        with h5py.File(self.data_path, "r") as f:
            self.h5_energy = f[self.scan_number]["measurement"]["energy_cenc"][()]
            self.h5_mu = f[self.scan_number]["measurement"]["mu_trans"][()]
        ### check if energy is in eV
        self.h5_energy = self.keV2eV(self.h5_energy)
        ### convert data to numpy array with [energy, mu]
        self.data = np.array([self.h5_energy, self.h5_mu])
        ### set e-range
        self.meta_data_dict["E_range_min"] = np.round(self.data[0, 0], decimals=1)
        self.meta_data_dict["E_range_max"] = np.round(self.data[0, -1], decimals=1)
        ### create raw plot
        self.create_raw_plot_base64()

    def load_specfile(
        self,
    ):
        """
        This function reads out spec files using the larch function
        read_specfile. The scan number has to be provided, otherwise the default
        scan number = 0 is selected and loaded.as is a
        """
        self.larch_data = read_specfile(self.data_path, scan=self.scan_number)
        self.larch_data.energy = self.larch_data.Energy
        self.larch_data.mu = (
            self.larch_data.RingCurrent
        )  ###TODO this is not correct!!! No example data for implementation...
        ### check if energy is in eV
        self.larch_data.energy = self.keV2eV(self.larch_data.energy)
        ### convert data to numpy array with [energy, mu]
        self.data = np.array([self.larch_data.energy, self.larch_data.mu])
        ### set e-range
        self.meta_data_dict["E_range_min"] = np.round(self.data[0, 0], decimals=1)
        self.meta_data_dict["E_range_max"] = np.round(self.data[0, -1], decimals=1)
        ### create raw plot
        self.create_raw_plot_base64()

    def create_raw_plot_base64(self, plot_data=False):
        """
        This function plots the read out data and creates a base64 string for
        storing the plot.

        Parameters
        ----------
        plot_data : bool, optional
            To plot or not to plot the data, that is the question. The default
            is False.
        """
        ### cut out e_min and e_max
        # print("I'm update_erange brah:", self.update_erange)
        if self.update_erange:
            try:
                self.E_range_min = float(
                    self.update_erange.get("E_range_min").replace(",", ".")
                )
                self.E_range_max = float(
                    self.update_erange.get("E_range_max").replace(",", ".")
                )
                self.meta_data_dict["E_range_min"] = self.E_range_min
                self.meta_data_dict["E_range_max"] = self.E_range_max
                energy = self.data[0]
                mu = self.data[1]
                mu = mu[self.E_range_min <= energy]
                energy = energy[self.E_range_min <= energy]
                mu = mu[self.E_range_max >= energy]
                energy = energy[self.E_range_max >= energy]
                self.data = np.array([energy, mu])
            except (TypeError, AttributeError):
                print("Update of e-range not possible!")
        ### define figures
        self.fig_raw_data = plt.figure("Preview Raw Data", figsize=(10, 6.25))
        self.fig_raw_data.clf()
        self.ax_raw_data = self.fig_raw_data.subplots()
        self.ax_raw_data.grid()
        major_ticks = np.arange(self.data[0, 0], self.data[0, -1], 100)
        minor_ticks = np.arange(self.data[0, 0], self.data[0, -1], 20)

        ### plot the raw data and label the axes and set the legend in the
        ### lower right corner
        self.ax_raw_data.plot(
            self.data[0], self.data[1], label="Measurement", color="#003161"
        )

        self.ax_raw_data.set_xlabel(r" Energy | eV")
        self.ax_raw_data.set_ylabel(r"$\mu (E)$ | a.u.")
        self.ax_raw_data.set_xlim(self.data[0, 0], self.data[0, -1])
        self.ax_raw_data.legend(loc="lower right")
        self.ax_raw_data.set_xticks(major_ticks)
        self.ax_raw_data.set_xticks(minor_ticks, minor=True)
        ### create a widget and plot the data if specified
        if plot_data:
            self.fig_raw_data.canvas.draw()
            self.fig_raw_data.canvas.flush_events()
        ### transform the matplotlib.figure to a base64 string and store it in
        ### the meta-data dictionary
        buffer = io.BytesIO()
        self.fig_raw_data.savefig(buffer, format="jpeg")
        data = base64.b64encode(buffer.getbuffer()).decode("ascii")
        self.meta_data_dict["PreviewRawData"] = "data:image/jpeg;base64,{}".format(data)

    def keV2eV(self, energy):
        """
        This function checks if the energy loaded is in eV. If it is not in eV
        than the energy is most likely in keV and has to be converted from
        keV to eV by multiplying the given energy array with 1000.

        Parameters
        ----------
        energy : array
            array which contains the energy.

        Returns
        -------
        the energy in eV
        """
        if energy[0] < 100:
            return energy * 1000
        else:
            return energy

    def print_mu(self):
        """
        Important function to draw a cow saying mu.
        """
        print("|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|")
        print("|\|/          (__)      |")
        print("|     `\------(oo)      |")
        print("|       ||    (__) <(mu)|")
        print("|       ||w--||     \|/ |")
        print("|   \|/                 |")
        print("|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|")
        print("QC succesfully performed")

__init__(source='SYNCHROTRON', update_erange='', scan_number=0, verbose=False)

Initializing the read_data class. This class helps to read in different data types for XAS measurements, currently .dat, .xdi, .spec are supported. A support for default detection of data by larch is also implemented.

Parameters

source : str, optional Type of source. Either SYNCHROTRON or LABORATORY. The default is "SYNCHROTRON". update_erange: dict, optional Updated energy range. scan_number : int, str, optional Number or name of the scan to check. Important for .mca, .spec and .h5 (from ESRF) For other file types this parameter is ignored. The default is 0. verbose : bool, optional Activate or deactivate the verbose mode. The default is False.

Source code in plugins/read_data.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def __init__(
    self, source="SYNCHROTRON", update_erange="", scan_number=0, verbose=False
):
    """
    Initializing the read_data class. This class helps to read in different
    data types for XAS measurements, currently .dat, .xdi, .spec are
    supported. A support for default detection of data by larch is also
    implemented.

    Parameters
    ----------
    source : str, optional
        Type of source. Either SYNCHROTRON or LABORATORY.
        The default is "SYNCHROTRON".
    update_erange: dict, optional
        Updated energy range.
    scan_number : int, str, optional
        Number or name of the scan to check. Important for .mca, .spec and
        .h5 (from ESRF) For other file types this parameter is ignored.
        The default is 0.
    verbose : bool, optional
        Activate or deactivate the verbose mode. The default is False.
    """

    ### give the class a version, x.y.date(YYYYMMDD)
    self.version = "0.1.20230612"
    self.scan_number = scan_number
    self.verbose = verbose
    if self.verbose:
        print("read_data class initialized -- v{}".format(self.version))
    self.source = source
    ### initialize dictionaries with supported beamlines and their specific
    ### keywords by whom the beamline is identified. These are characteristic
    ### for each file and has to be adapted when a change of syntax occurs
    ### in the beamline datasets
    self.supported_beamlines = {
        "SYNCHROTRON": [
            "CATACT KIT",
            "PETRA III Extension Beamline P65",
            "ELETTRA XAFS",
            "SLRI",
            "ESRF BM 23",
            "SOLEIL ROCK",
            "SOLEIL SAMBA",
            "SLS",
            "DELTA",
            "SOLARIS",
        ],
        "LABORATORY": [
            "TU Berlin",
        ],
    }
    self.beamline_key_words = {
        "SYNCHROTRON": {
            "CATACT KIT": [
                "catexp",
            ],
            "PETRA III Extension Beamline P65": [
                "PETRA III Extension Beamline P65",
            ],
            "ELETTRA XAFS": ["Project Name:"],
            "SLRI": ["BL8: X-ray Absorption Spectroscopy"],
            "ESRF BM 23": ["#ZapEnergy"],
            "SOLEIL ROCK": ["Synchrotron SOLEIL"],
            "SOLEIL SAMBA": [
                "# Energy, Theta, XMU, FLUO, REF, FLUO_RAW, I0, I1, I2, I3"
            ],
            "SLS": ["#posX	SAI01-MEAN	SAI02-MEAN"],
            "DELTA": ["# created:"],
            "SOLARIS": ["#C Acquisition started"],
        },
        "LABORATORY": {
            "TU Berlin": ["# Energies_eV"],
        },
    }
    ### initialize the class to default
    self.reset_2_default()
    self.update_erange = update_erange

create_raw_plot_base64(plot_data=False)

This function plots the read out data and creates a base64 string for storing the plot.

Parameters

plot_data : bool, optional To plot or not to plot the data, that is the question. The default is False.

Source code in plugins/read_data.py
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
def create_raw_plot_base64(self, plot_data=False):
    """
    This function plots the read out data and creates a base64 string for
    storing the plot.

    Parameters
    ----------
    plot_data : bool, optional
        To plot or not to plot the data, that is the question. The default
        is False.
    """
    ### cut out e_min and e_max
    # print("I'm update_erange brah:", self.update_erange)
    if self.update_erange:
        try:
            self.E_range_min = float(
                self.update_erange.get("E_range_min").replace(",", ".")
            )
            self.E_range_max = float(
                self.update_erange.get("E_range_max").replace(",", ".")
            )
            self.meta_data_dict["E_range_min"] = self.E_range_min
            self.meta_data_dict["E_range_max"] = self.E_range_max
            energy = self.data[0]
            mu = self.data[1]
            mu = mu[self.E_range_min <= energy]
            energy = energy[self.E_range_min <= energy]
            mu = mu[self.E_range_max >= energy]
            energy = energy[self.E_range_max >= energy]
            self.data = np.array([energy, mu])
        except (TypeError, AttributeError):
            print("Update of e-range not possible!")
    ### define figures
    self.fig_raw_data = plt.figure("Preview Raw Data", figsize=(10, 6.25))
    self.fig_raw_data.clf()
    self.ax_raw_data = self.fig_raw_data.subplots()
    self.ax_raw_data.grid()
    major_ticks = np.arange(self.data[0, 0], self.data[0, -1], 100)
    minor_ticks = np.arange(self.data[0, 0], self.data[0, -1], 20)

    ### plot the raw data and label the axes and set the legend in the
    ### lower right corner
    self.ax_raw_data.plot(
        self.data[0], self.data[1], label="Measurement", color="#003161"
    )

    self.ax_raw_data.set_xlabel(r" Energy | eV")
    self.ax_raw_data.set_ylabel(r"$\mu (E)$ | a.u.")
    self.ax_raw_data.set_xlim(self.data[0, 0], self.data[0, -1])
    self.ax_raw_data.legend(loc="lower right")
    self.ax_raw_data.set_xticks(major_ticks)
    self.ax_raw_data.set_xticks(minor_ticks, minor=True)
    ### create a widget and plot the data if specified
    if plot_data:
        self.fig_raw_data.canvas.draw()
        self.fig_raw_data.canvas.flush_events()
    ### transform the matplotlib.figure to a base64 string and store it in
    ### the meta-data dictionary
    buffer = io.BytesIO()
    self.fig_raw_data.savefig(buffer, format="jpeg")
    data = base64.b64encode(buffer.getbuffer()).decode("ascii")
    self.meta_data_dict["PreviewRawData"] = "data:image/jpeg;base64,{}".format(data)

extract_header(data_path=None)

Function to exctract data from the header of the given datafile. This is of course specific for each beamline and has to be adapted and extended if changes in the beamline data occurs or new beamlines to be supported. Also until now only a limited set of metadata is read out. Namely: ['Facility', 'Beamline', 'Owner', 'Coll.code', 'Acq. mode', 'Temperature']

Parameters

data_path : str Path to the loaded data.

Returns

dictionary containing all the found meta data

Source code in plugins/read_data.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
def extract_header(self, data_path=None):
    """
    Function to exctract data from the header of the given datafile. This
    is of course specific for each beamline and has to be adapted and
    extended if changes in the beamline data occurs or new beamlines to be
    supported. Also until now only a limited set of metadata is read out.
    Namely: ['Facility', 'Beamline', 'Owner', 'Coll.code', 'Acq. mode',
             'Temperature']

    Parameters
    ----------
    data_path : str
        Path to the loaded data.

    Returns
    -------
    dictionary
        containing all the found meta data

    """
    ### enable header extraction mode
    ### determine beamline
    if data_path:
        self.process_data(data_path=data_path)
    ### fill all the meta data contained in the files provided by the facilities
    ### into the dictionary
    # self.meta_data_dict = {}
    self.meta_data_dict["Header"] = self.header
    self.meta_data_dict["Header_str"] = "\n".join(self.header)
    self.meta_data_dict["Beamline"] = self.beamline
    coll_code = ""

    ### SYNCHROTRON
    if self.beamline == "CATACT KIT":
        self.meta_data_dict["Facility"] = "KIT Light Source"
        for line in self.header:
            line = line.replace("\n", "")
            if "#F" in line:
                coll_code += line.split()[-1] + " "
            elif "#E" in line:
                coll_code += line.split()[-1]
            elif "#D" in line:
                coll_code += line.replace("#D ", "")
                self.meta_data_dict["Coll.code"] = coll_code
            elif "#C" in line:
                self.meta_data_dict["Owner"] = line.split()[-1]
        ### no further needed (as for 20230314) meta data in file
    elif self.beamline == "PETRA III Extension Beamline P65":
        self.meta_data_dict["Facility"] = "DESY"
        self.meta_data_dict["Beamline"] = "P65"
        ### no further needed (as for 20230314) meta data in file
    elif self.beamline == "ELETTRA XAFS":
        self.meta_data_dict["Facility"] = "ELETTRA XAFS"
        self.meta_data_dict["Beamline"] = "XAFS"
        for line in self.header:
            line = line.replace("\n", "")
            if "Project Name" in line:
                self.meta_data_dict["Coll.code"] = line.split()[-1]
        ### no further needed (as for 20230314) meta data in file
    elif self.beamline == "SLRI":
        self.meta_data_dict["Facility"] = "SLRI"
        for line in self.header:
            line = line.replace("\n", "")
            if "#B" in line:
                self.meta_data_dict["Beamline"] = line.split(":")[0].replace(
                    "#", ""
                )
            elif "#Transmission" in line:
                self.meta_data_dict["Acq. mode"] = "Transmission"
        ### no further needed (as for 20230314) meta data in file
    elif self.beamline == "ESRF BM 23":
        self.meta_data_dict["Facility"] = "ESRF"
        ### no further needed (as for 20230314) meta data in file
    elif self.beamline == "SOLEIL ROCK":
        self.meta_data_dict["Facility"] = "SOLEIL"
        for line in self.header:
            line = line.replace("\n", "")
            if "#Sample temperature" in line:
                try:
                    self.meta_data_dict["Temperature"] = int(line.split("=")[-1])
                except:
                    pass
        ### no further needed (as for 20230314) meta data in file
    elif self.beamline == "SOLEIL SAMBA":
        self.meta_data_dict["Facility"] = "SOLEIL"
        ### no further meta data in file
    elif self.beamline == "SLS":
        self.meta_data_dict["Facility"] = "SLS"
        self.meta_data_dict["Beamline"] = "SLS SuperXAS"
        ### no further meta data in file
    elif self.beamline == "SOLARIS":
        self.meta_data_dict["Facility"] = "SOLARIS"
        self.meta_data_dict["Beamline"] = "PIRX"  # TODO
        ### no further meta data in file

    ### LABORATORY
    elif self.beamline == "TU Berlin":
        self.meta_data_dict["Facility"] = "TU Berlin"
        ### no further meta data in file

    ### This dict is only for reference
    ### it represents the xafsdb Widget meta data keys, not important for
    ### read out
    dummy_dict = {
        "owner_group": "Owner group",
        "owner": "Owner",
        "contact_email": "Contact email",
        "Abstract": "Abstract",
        "coll_code": "Coll.code",
        "physical_state": "Phys.state",
        "crystal_orientation": "Crystal orientation",
        "temperature": "Temperature",
        "pressure": "Pressure",
        "sample_environment": "Sample environment",
        "general_remarks": "General remarks",
        "facility": "Facility",
        "beamline": "Beamline",
        "aquisition_mode": "Acq. mode",
        "crystals": "Crystals",
        "mirrors": "Mirrors",
        "detectors": "Detectors",
        "element_input": "Element",
        "edge_input": "Edge",
        "max_k_range": "Max k-range",
        "doi": "DOI",
        "EnergyColumn": "Energy Column",
        "I_zeroColumn": "I0 Column",
        "TransmissionColumn": "Transmission Column",
        "MuColumn": "Mu Column",
        "reference": "Reference",
    }
    return self.meta_data_dict

keV2eV(energy)

This function checks if the energy loaded is in eV. If it is not in eV than the energy is most likely in keV and has to be converted from keV to eV by multiplying the given energy array with 1000.

Parameters

energy : array array which contains the energy.

Returns

the energy in eV

Source code in plugins/read_data.py
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
def keV2eV(self, energy):
    """
    This function checks if the energy loaded is in eV. If it is not in eV
    than the energy is most likely in keV and has to be converted from
    keV to eV by multiplying the given energy array with 1000.

    Parameters
    ----------
    energy : array
        array which contains the energy.

    Returns
    -------
    the energy in eV
    """
    if energy[0] < 100:
        return energy * 1000
    else:
        return energy

load_data()

This function opens the data file and returns the data in the form of array([energy, mu]) depending of the found beamline. This has to be adapted and extended if changes in the beamline data occurs or new beamlines to be supported.

Returns

numpy.array array([energy, mu])

Source code in plugins/read_data.py
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def load_data(
    self,
):
    """
    This function opens the data file and returns the data in the form of
    array([energy, mu]) depending of the found beamline. This has to be
    adapted and extended if changes in the beamline data occurs or new
    beamlines to be supported.

    Returns
    -------
    numpy.array
        array([energy, mu])
    """
    data = self.larch_data.data

    ### SYNCHROTRON
    if self.beamline == "CATACT KIT":
        # self.larch_data.energy = data[0, :]*1000 ### correct
        self.larch_data.energy = data[0, :]
        self.larch_data.I0 = data[6, :]
        self.larch_data.transmission = data[5, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "PETRA III Extension Beamline P65":
        self.larch_data.energy = data[1, :]
        self.larch_data.I0 = data[12, :]
        self.larch_data.transmission = data[11, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "ELETTRA XAFS":
        self.larch_data.energy = data[0, :]
        self.larch_data.I0 = data[3, :]
        self.larch_data.transmission = data[2, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "SLRI":
        self.larch_data.energy = data[0, :]
        self.larch_data.I0 = data[4, :]
        self.larch_data.transmission = data[3, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "ESRF BM 23":
        # self.larch_data.energy = data[0, :]*1000 ### correct
        self.larch_data.energy = data[0, :]
        self.larch_data.I0 = data[2, :]
        self.larch_data.transmission = data[1, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "SOLEIL ROCK":
        self.larch_data.energy = data[0, :]
        self.larch_data.mu = data[1, :]
    elif self.beamline == "SOLEIL SAMBA":
        self.larch_data.energy = data[0, :]
        self.larch_data.I0 = data[8, :]
        self.larch_data.transmission = data[6, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "SLS":
        # self.larch_data.energy = data[0, :]*1000 ### correct
        self.larch_data.energy = data[0, :]
        self.larch_data.I0 = data[3, :]
        self.larch_data.transmission = data[2, :]
        self.larch_data.mu = np.log(
            self.larch_data.transmission / self.larch_data.I0
        )
    elif self.beamline == "DELTA":
        self.larch_data.energy = data[0, :]
        self.larch_data.mu = data[1, :]
    elif self.beamline == "SOLARIS":
        self.larch_data.mu = self.larch_data.d2 / self.larch_data.sr

    ### LABORATORY
    elif self.beamline == "TU Berlin":
        self.larch_data.energy = data[0, :]
        self.larch_data.mu = data[1, :]

    ### check unit of energy, if value below 100 it is most likely keV
    ### --> change it to eV by multipying it with 1000
    self.larch_data.energy = self.keV2eV(self.larch_data.energy)
    ### convert data to numpy array with [energy, mu]
    self.data = np.array([self.larch_data.energy, self.larch_data.mu])
    ### set e-range
    self.meta_data_dict["E_range_min"] = np.round(self.data[0, 0], decimals=1)
    self.meta_data_dict["E_range_max"] = np.round(self.data[0, -1], decimals=1)
    ### create raw plot
    self.create_raw_plot_base64()

load_hdf()

This function reads out h5 files. At the moment (20230614) only ESRF is providing their measurement data in an h5 file for the user. So only ESRF h5 format is supported. If more facilities are providing h5 files either a standard form is given or the facility has to be automatically determined and a adapted procedure has to be developed.

Source code in plugins/read_data.py
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def load_hdf(
    self,
):
    """
    This function reads out h5 files. At the moment (20230614) only ESRF is
    providing their measurement data in an h5 file for the user. So only
    ESRF h5 format is supported. If more facilities are providing h5 files
    either a standard form is given or the facility has to be automatically
    determined and a adapted procedure has to be developed.
    """
    ### first check if a scan number is provided. If not set it to default
    ### '1.1'
    self.header = ""

    if type(self.scan_number) == int:
        self.scan_number = "1.1"
        if self.verbose:
            print("No scan number for hdf5 files provided, setting to default 1.1")
    ### now open the h5 and retrieve the measurement data
    with h5py.File(self.data_path, "r") as f:
        self.h5_energy = f[self.scan_number]["measurement"]["energy_cenc"][()]
        self.h5_mu = f[self.scan_number]["measurement"]["mu_trans"][()]
    ### check if energy is in eV
    self.h5_energy = self.keV2eV(self.h5_energy)
    ### convert data to numpy array with [energy, mu]
    self.data = np.array([self.h5_energy, self.h5_mu])
    ### set e-range
    self.meta_data_dict["E_range_min"] = np.round(self.data[0, 0], decimals=1)
    self.meta_data_dict["E_range_max"] = np.round(self.data[0, -1], decimals=1)
    ### create raw plot
    self.create_raw_plot_base64()

load_specfile()

This function reads out spec files using the larch function read_specfile. The scan number has to be provided, otherwise the default scan number = 0 is selected and loaded.as is a

Source code in plugins/read_data.py
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
def load_specfile(
    self,
):
    """
    This function reads out spec files using the larch function
    read_specfile. The scan number has to be provided, otherwise the default
    scan number = 0 is selected and loaded.as is a
    """
    self.larch_data = read_specfile(self.data_path, scan=self.scan_number)
    self.larch_data.energy = self.larch_data.Energy
    self.larch_data.mu = (
        self.larch_data.RingCurrent
    )  ###TODO this is not correct!!! No example data for implementation...
    ### check if energy is in eV
    self.larch_data.energy = self.keV2eV(self.larch_data.energy)
    ### convert data to numpy array with [energy, mu]
    self.data = np.array([self.larch_data.energy, self.larch_data.mu])
    ### set e-range
    self.meta_data_dict["E_range_min"] = np.round(self.data[0, 0], decimals=1)
    self.meta_data_dict["E_range_max"] = np.round(self.data[0, -1], decimals=1)
    ### create raw plot
    self.create_raw_plot_base64()

print_mu()

Important function to draw a cow saying mu.

Source code in plugins/read_data.py
550
551
552
553
554
555
556
557
558
559
560
561
def print_mu(self):
    """
    Important function to draw a cow saying mu.
    """
    print("|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|")
    print("|\|/          (__)      |")
    print("|     `\------(oo)      |")
    print("|       ||    (__) <(mu)|")
    print("|       ||w--||     \|/ |")
    print("|   \|/                 |")
    print("|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|¯¯¯¯¯|")
    print("QC succesfully performed")

process_data(data_path)

This function determines the datafile. If it is .hdf it will open the hdf-file with h5py, else it will open the datafile and read out the lines and checking for distinct keywords and starting the readout.

Parameters

data_path : str absolute path to the xdi file.

Source code in plugins/read_data.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def process_data(self, data_path):
    """
    This function determines the datafile. If it is .hdf it will open
    the hdf-file with h5py, else it will open the datafile and read out
    the lines and checking for distinct keywords and starting the readout.

    Parameters
    ----------
    data_path : str
        absolute path to the xdi file.
    """
    self.data_path = data_path
    self.data_type = self.data_path.split(".")[-1]
    if self.data_type in ["h5", "hdf", "hdf5"]:
        self.load_hdf()
    elif self.data_type in [".spec"]:
        self.load_specfile()
    else:
        self.read_out_file()

read_out_file()

This function reads every line of the file and checks, if any known keyword is in the file. When succesful, the beamline is stored and the specified load_data function is called.

Source code in plugins/read_data.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def read_out_file(
    self,
):
    """
    This function reads every line of the file and checks, if any known
    keyword is in the file. When succesful, the beamline is stored and the
    specified load_data function is called.
    """
    ### resetting to default, clear all data
    self.reset_2_default()
    ### use larch to read out data and the header of the file
    self.larch_data = read_ascii(self.data_path)
    self.header = self.larch_data.header
    ### now check the header for keywords for the specific beamlines
    for line in self.header:
        for beamline in self.supported_beamlines[self.source]:
            for keyword in self.beamline_key_words[self.source][beamline]:
                if keyword in line:
                    self.beamline = beamline
                    break
    ### if a beamline was found, e
    if self.beamline:
        print("beamline found:\t", self.beamline)
        self.extract_header()
        self.load_data()
    else:
        print("no beamline found, going to numpy extraction mode")
        self.data = read_ascii(self.data_path).data[:2]
        ### check unit of energy, if value below 100 it is most likely keV
        ### --> change it to eV by multipying it with 1000
        if self.data[0, 0] < 100:
            self.data[0] *= 1000
        self.meta_data_dict["E_range_min"] = self.data[0, 0]
        self.meta_data_dict["E_range_max"] = self.data[0, -1]
        self.create_raw_plot_base64()

reset_2_default()

Function to set all relevant data to default.

Source code in plugins/read_data.py
116
117
118
119
120
121
122
123
124
125
126
127
def reset_2_default(
    self,
):
    """
    Function to set all relevant data to default.
    """
    ### set the beamline to None as undetected
    self.beamline = None
    self.meta_data_dict = {}
    ### the default source type is SYNCHROTRON. This is changed, when data
    ### is read
    self.source = "SYNCHROTRON"