Commit 7b9809ed authored by Panagiotis Papadakos's avatar Panagiotis Papadakos
Browse files

[Entries - Textual information] Add code to read json entries from Semantic...

[Entries - Textual information] Add code to read json entries from Semantic Scholar Open Research Corpus
parents
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>gr.csd.uoc.hy463</groupId>
<artifactId>hy463-fairness-trec-2020</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
<type>jar</type>
</dependency>
</dependencies>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>13</maven.compiler.source>
<maven.compiler.target>13</maven.compiler.target>
</properties>
</project>
\ No newline at end of file
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package gr.csd.uoc.hy463.fairness.trec.model;
import java.util.List;
/**
*
* @author papadako
*/
public class S2TextualEntry {
private String id = null;
private String title = null;
private String paperAbstract = null;
private List<String> entities = null;
private List<String> fieldsOfStudy = null;
private List<String> authors = null;
private int year = 0;
private String venue = null;
private String journalName = null;
private List<String> sources = null;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPaperAbstract() {
return paperAbstract;
}
public void setPaperAbstract(String paperAbstract) {
this.paperAbstract = paperAbstract;
}
public List<String> getEntities() {
return entities;
}
public void setEntities(List<String> entities) {
this.entities = entities;
}
public List<String> getFieldsOfStudy() {
return fieldsOfStudy;
}
public void setFieldsOfStudy(List<String> fieldsOfStudy) {
this.fieldsOfStudy = fieldsOfStudy;
}
public List<String> getAuthors() {
return authors;
}
public void setAuthors(List<String> authors) {
this.authors = authors;
}
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public String getVenue() {
return venue;
}
public void setVenue(String venue) {
this.venue = venue;
}
public String getJournalName() {
return journalName;
}
public void setJournalName(String journalName) {
this.journalName = journalName;
}
public List<String> getSources() {
return sources;
}
public void setSources(List<String> sources) {
this.sources = sources;
}
/**
*
* @return
*/
public String toString() {
StringBuilder sb = new StringBuilder();
if (title != null) {
sb.append("Title: ").append(title).append("\n");
}
if (paperAbstract != null) {
sb.append("Abstract: ").append(paperAbstract).append("\n");
}
if (entities != null) {
boolean first = true;
sb.append("Entities:");
for (String entity : entities) {
sb.append(entity);
if (!first) {
sb.append(",");
} else {
first = false;
}
}
sb.append("\n");
}
if (fieldsOfStudy != null) {
boolean first = true;
sb.append("Fields Of Study:");
for (String field : fieldsOfStudy) {
sb.append(field);
if (!first) {
sb.append(",");
} else {
first = false;
}
}
sb.append("\n");
}
if (authors != null) {
boolean first = true;
sb.append("Author Names:");
for (String author : authors) {
sb.append(author);
if (!first) {
sb.append(",");
} else {
first = false;
}
}
sb.append("\n");
}
if (journalName != null) {
sb.append("Journal Name: ").append(journalName).append("\n");
}
if (venue != null) {
sb.append("Venue: ").append(venue).append("\n");
}
if (sources != null) {
boolean first = true;
sb.append("Sources:");
for (String source : sources) {
sb.append(source);
if (!first) {
sb.append(",");
} else {
first = false;
}
}
sb.append("\n");
}
if (year != 0) {
sb.append("Year: ").append(year).append("\n");
}
return sb.toString();
}
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package gr.csd.uoc.hy463.fairness.trec.utils;
import gr.csd.uoc.hy463.fairness.trec.model.S2TextualEntry;
import java.util.ArrayList;
import java.util.List;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
/**
* Class responsible for reading textual entries from the json description of
* entries
*
* TODO: you have to update the code so that you can read all the relevant info
*
* @author papadako
*/
public class S2JsonEntryReader {
// Method that reads all textual information from an entry
public static S2TextualEntry readTextualEntry(String jsonToRead) {
S2TextualEntry entry = new S2TextualEntry();
JSONParser parser = new JSONParser();
try {
Object obj = parser.parse(jsonToRead);
// This should be a JSON object.
JSONObject jsonObject = (JSONObject) obj;
// Get the title for example
String id = (String) jsonObject.get("id");
entry.setId(id);
// Get the title for example
String title = (String) jsonObject.get("title");
entry.setTitle(title);
// Get abstract for example
String paperAbstract = (String) jsonObject.get("paperAbstract");
entry.setPaperAbstract(paperAbstract);
// Read entities. A JSONArray
JSONArray entitiesArray = (JSONArray) jsonObject.get("entities");
List<String> entities = new ArrayList<>();
entitiesArray.forEach(entity -> {
entities.add(entity.toString());
});
entry.setEntities(entities);
// Read fieldsOfStudy. A JSONArray
JSONArray fieldsArray = (JSONArray) jsonObject.get("fieldsOfStudy");
List<String> fields = new ArrayList<>();
fieldsArray.forEach(field -> {
fields.add(field.toString());
});
entry.setFieldsOfStudy(fields);
// Read authors. A JSONArray
JSONArray authorsList = (JSONArray) jsonObject.get("authors");
System.out.println(authorsList);
List<String> authors = new ArrayList<>();
for (int i = 0; i < authorsList.size(); i++) {
JSONObject authorInfo = (JSONObject) authorsList.get(i);
String authorName = (String) authorInfo.get("name");
authors.add(authorName);
}
entry.setAuthors(authors);
// Get journal for example
String journal = (String) jsonObject.get("journalName");
entry.setJournalName(journal);
// Read sources. A JSONArray
JSONArray sourcesArray = (JSONArray) jsonObject.get("sources");
List<String> sources = new ArrayList<>();
sourcesArray.forEach(source -> {
sources.add(source.toString());
});
entry.setSources(sources);
// Get year for example
int year = ((Long) jsonObject.get("year")).intValue();
entry.setYear(year);
// Get venue for example
String venue = (String) jsonObject.get("venue");
entry.setVenue(venue);
} catch (Exception e) {
e.printStackTrace();
}
return entry;
}
public static void main(String[] args) {
String json = "{\n"
+ " \"id\": \"4cd223df721b722b1c40689caa52932a41fcc223\",\n"
+ " \"title\": \"Knowledge-rich, computer-assisted composition of Chinese couplets\",\n"
+ " \"paperAbstract\": \"Recent research effort in poem composition has focused on the use of\n"
+ " automatic language generation...\",\n"
+ " \"entities\": [\n"
+ " \"Conformance testing\",\n"
+ " \"Natural language generation\",\n"
+ " \"Natural language processing\",\n"
+ " \"Parallel computing\",\n"
+ " \"Stochastic grammar\",\n"
+ " \"Web application\"\n"
+ " ],\n"
+ " \"fieldsOfStudy\": [\n"
+ " \"Computer Science\"\n"
+ " ],\n"
+ " \"s2Url\": \"https://semanticscholar.org/paper/4cd223df721b722b1c40689caa52932a41fcc223\",\n"
+ " \"s2PdfUrl\": \"\",\n"
+ " \"pdfUrls\": [\n"
+ " \"https://doi.org/10.1093/llc/fqu052\"\n"
+ " ],\n"
+ " \"authors\": [\n"
+ " {\n"
+ " \"name\": \"John Lee\",\n"
+ " \"ids\": [\n"
+ " \"3362353\"\n"
+ " ]\n"
+ " }\n"
+ " ],\n"
+ " \"inCitations\": [\n"
+ " \"c789e333fdbb963883a0b5c96c648bf36b8cd242\"\n"
+ " ],\n"
+ " \"outCitations\": [\n"
+ " \"abe213ed63c426a089bdf4329597137751dbb3a0\",\n"
+ " \"...\"\n"
+ " ],\n"
+ " \"year\": 2016,\n"
+ " \"venue\": \"DSH\",\n"
+ " \"journalName\": \"DSH\",\n"
+ " \"journalVolume\": \"31\",\n"
+ " \"journalPages\": \"152-163\",\n"
+ " \"sources\": [\n"
+ " \"DBLP\"\n"
+ " ],\n"
+ " \"doi\": \"10.1093/llc/fqu052\",\n"
+ " \"doiUrl\": \"https://doi.org/10.1093/llc/fqu052\",\n"
+ " \"pmid\": \"\"\n"
+ "}";
System.out.println(json);
System.out.println(S2JsonEntryReader.readTextualEntry(json));
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment