#!/usr/bin/env python # carrel2tsv.py - given a few configurations, output bibliographics in the form of TSV # Eric Lease Morgan # (c) University of Notre Dame; distributed under a GNU Public License # April 12, 2024 - first cut; illustrating the functionality of HTTP content negotiation # configure CARREL = 'subject-englandAndWales-freebo' HOST = 'http://carrels.distantreader.org' MIMETYPE = 'application/json' CACHE = 'cache' COLUMNS = [ 'author', 'title', 'url' ] # require from requests import get from json import loads from pandas import DataFrame # build a request, submit it, and get the content of the response; get the metadata url = HOST + '/' + CARREL headers = { 'Accept':MIMETYPE } json = get( url, headers=headers ).text # format the data into json and iterate over it; create a list of records metadata = loads( json ) records = [] for item in metadata : # parse identifier = item[ 'id' ] author = item[ 'author' ] title = item[ 'title' ] extension = item[ 'extension' ] # update the list of records records.append( [ author, title, '/'.join( [ HOST, CARREL, CACHE, identifier + extension ] ) ] ) # create a data frame, output, and done metadata = DataFrame( records, columns=COLUMNS ) print( metadata.to_csv( index=False, sep='\t' ) ) exit()