Commit db4d9476 authored by Panagiotis Papadakos's avatar Panagiotis Papadakos
Browse files

[Config] Add parameter for default model. Also remove forgotten files

parent c7dff914
......@@ -121,6 +121,15 @@ public class Config {
return __PROP__.getProperty("META_FILENAME");
}
/**
* Returns the default retrieval model (VSM/BM25)
*
* @return
*/
public String getRetrievalModel() {
return __PROP__.getProperty("RETRIEVAL_MODEL");
}
/**
* Returns if we should user the stemmer
*
......
/*
* themis - A fair search engine for scientific articles
*
* Currently over the Semantic Scholar Open Research Corpus
* http://s2-public-api-prod.us-west-2.elasticbeanstalk.com/corpus/
*
* Collaborative work with the undergraduate/graduate students of
* Information Retrieval Systems (hy463) course
* Spring Semester 2020
*
* -- Writing code during COVID-19 pandemic times :-( --
*
* Aiming to participate in TREC 2020 Fair Ranking Track
* https://fair-trec.github.io/
*
* Computer Science Department http://www.csd.uoc.gr
* University of Crete
* Greece
*
* LICENCE: TO BE ADDED
*
* Copyright 2020
*
*/
package gr.csd.uoc.hy463.themis.model;
import java.util.HashMap;
import java.util.Map;
/**
* This class holds any information we might want to communicate with the
* retrieval model we are implementing about a specific document
*
* Currently just holds the important things.
*
* Can also be extended by another class, DocInfoFull, that will be used
* whenever we want to get all information related with a document, i.e. the
* entry of a document in the Documents file
*
* Since probably we are going to store in memory a lot of these objects, we
* have to be as memory efficient as we can. This implementation with a map is
* worst than just keeping all properties as primitives and private members but
* seems to be simpler to interact with
*
* ID and offset are set only in the constructor
*
* @author Panagiotis Papadakos <papadako at ics.forth.gr>
*/
public class DocInfoEssential {
public enum PROPERTY {
PAGERANK, // pagerank score for 2nd phase (Value should be double)
WEIGHT, // weight (norm) of document VSM (Value should be double)
LENGTH // for OkapiBM25 (Value should be integer)
}
protected String id = ""; // the 40 byte id
protected long offset = 0; // offset in documents file
// The size of the hashmap is only 3.... since up to now we have 3 properties to hold
protected final Map<PROPERTY, Object> props = new HashMap<>(3);
public DocInfoEssential(String id, long offset) {
this.id = id;
this.offset = offset;
}
/**
* Set property for this docID. Properties come from the PROPERY enum and
* value is an object
*
* @param prop
* @param value
*/
public void setProperty(DocInfoEssential.PROPERTY prop, Object value) {
props.put(prop, value);
}
/**
* Return the value of the property
*
* @param prop
* @return
*/
public Object getProperty(DocInfoEssential.PROPERTY prop) {
return props.get(prop);
}
public String getId() {
return id;
}
public long getOffset() {
return offset;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DocInfoEssential other = (DocInfoEssential) o;
return this.id == other.id;
}
@Override
public int hashCode() {
return id.hashCode();
}
}
/*
* themis - A fair search engine for scientific articles
*
* Currently over the Semantic Scholar Open Research Corpus
* http://s2-public-api-prod.us-west-2.elasticbeanstalk.com/corpus/
*
* Collaborative work with the undergraduate/graduate students of
* Information Retrieval Systems (hy463) course
* Spring Semester 2020
*
* -- Writing code during COVID-19 pandemic times :-( --
*
* Aiming to participate in TREC 2020 Fair Ranking Track
* https://fair-trec.github.io/
*
* Computer Science Department http://www.csd.uoc.gr
* University of Crete
* Greece
*
* LICENCE: TO BE ADDED
*
* Copyright 2020
*
*/
package gr.csd.uoc.hy463.themis.model;
/**
* This class could be used when we want to get all information of a specific
* document, etc. title, authors, etc.
*
* @author Panagiotis Papadakos <papadako at ics.forth.gr>
*/
public class DocInfoFull extends DocInfoEssential {
public DocInfoFull(String id, long offset) {
super(id, offset);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DocInfoEssential other = (DocInfoEssential) o;
return this.id == other.id;
}
@Override
public int hashCode() {
return id.hashCode();
}
}
......@@ -6,6 +6,9 @@ USE_STOPWORDS = true
PARTIAL_INDEX_MAX_DOCS_SIZE = 200000
MAX_MEMORY = 2GB
# Retrieval model VSM/BM25
RETRIEVAL_MODEL = BM25
# Related to indices file names
INDEX_PATH = /home/papadako/index/
DATASET_PATH = /home/papadako/dataset
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment