alma_digital_representation.py

# author: wieg
# 21.01.2021

# Script for testing the Alma Bib api
# Fetches 2 bib records, cleans the contained xml string 
# and prints the resulting json to the file jsResponse_noXml.json
# Without the xml cleaning, the result would look like the Content
# of jsResponse.json

import requests
import json
import configparser
from pathlib import Path

PROJECT_DIRECTORY_PATH = Path(__file__).resolve().parent
CONFIG_FILE_NAME = "config.ini"
CONFIG_FILE_PATH = PROJECT_DIRECTORY_PATH.joinpath(CONFIG_FILE_NAME)
config = configparser.ConfigParser()
config.read(CONFIG_FILE_PATH)

ALMA_API_HOST = "https://api-eu.hosted.exlibrisgroup.com"
ALMA_API_KEY = config["API"]["apikey"]
ALMA_API_BIBS = ALMA_API_HOST + "/almaws/v1/bibs/"

HEADERS = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "apikey {apikey}".format(apikey=ALMA_API_KEY)
}

# List of MMS IDs for which we want the digital representations
mms_list = ["990104431070205508", "990106380350205508"]

# Initialize dictionary to store the retrieved representation IDs 
# and file IDs with their respective MMS ID
# { mms_id : [ representation_id, file_id ], mms_id : [ representation_id, file_id ] }
rep_id_dict = {}

# Send one GET request per MMS ID
for mms in mms_list:
    try:
        requesturl_rep = ALMA_API_BIBS + "{mms_id}/representations".format(mms_id=mms)
        print("Request URL is", requesturl_rep)
        response_rep = requests.get(requesturl_rep, headers=HEADERS)
    except Exception as e:
        print(e)

    # Process the response
    # Retrieve representation ID
    try:
        # Deserialize json string to a python dict
        response_rep_dict = json.loads(response_rep.text)
        rep_id = response_rep_dict['representation'][0]['id']
        # Add representation ID to previously created dictionary
        # Each MMMS ID key has a list as value
        # The list will be filled with two items: representation ID and file ID
        rep_id_dict[mms] = []
        rep_id_dict[mms].append(rep_id)
        print("Representation ID is", rep_id_dict[mms][0])
    except Exception as e:
        print(e)

    # Make another request to the representation files API to retrieve file ID
    try:
        requesturl_file = ALMA_API_BIBS + "{mms_id}/representations/{rep_id}/files".format(mms_id=mms, rep_id=rep_id)
        print("URL for the files API request:", requesturl_file)
        response_file = requests.get(requesturl_file, headers=HEADERS)
        print("Status code:", response_file.status_code)
        print(response_file.text)
    except Exception as e:
        print(e)

    # Process the response
    # Retrieve file ID
    try:
        # Deserialize json string to a python dict
        response_file_dict = json.loads(response_file.text)
        file_id = response_file_dict['representation_file'][0]['pid']
        # Add file ID to previously created dictionary
        # Add file ID as second item to the list corresponding to the MMS key
        rep_id_dict[mms].append(file_id)
        print("File ID is", rep_id_dict[mms][1])
    except Exception as e:
        print(e)

# Make one GET request to the files API per key in rep_id_dict
for key in rep_id_dict.keys():
    try:
        # Make the request
        requesturl_img = 'https://slsp-uzb.alma.exlibrisgroup.com/view/delivery/41SLSP_UZB/{rep_id}/{file_id}'.format(rep_id=rep_id_dict[key][0], file_id=rep_id_dict[key][1])
        params = {"MMSID": key, "RepresentationID": rep_id_dict[key]}
        # print("Params are:", params)
        with requests.get(requesturl_img, params=params, headers=HEADERS, stream=True) as response_img:
            # Process the response
            print("Status code:", response_img.status_code)
            with open('images/{mms}.jpg'.format(mms=key), mode='wb') as imagefile:
                imagefile.write(response_img.content)

    except Exception as e:
        print(e)

# Get file out of Alma
# https://slsp-uzb.alma.exlibrisgroup.com//view/delivery/REPRESENTATION_ID/FILE_ID