49 #ifndef _CINVERTEDFILEACCESSOR
50 #define _CINVERTEDFILEACCESSOR
51 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
53 #include "libMRML/include/TID.h"
54 #include "libMRML/include/CSelfDestroyPointer.h"
55 #include "libMRML/include/CArraySelfDestroyPointer.h"
56 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
57 #include "CCollectionFrequencyList.h"
58 #include "libGIFTAcInvertedFile/include/CADIHash.h"
59 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
72 #include "libMRML/include/CMagic.h"
75 typedef TID TFeatureID ;
132 ostream& inOpenOffsetFile);
164 TID
URLToID(
const string& inURL)
const;
231 double inDocumentFrequency)
const;
virtual double DIDToMaxDocumentFrequency(TID) const =0
returns the maximum document frequency for one document ID
virtual pair< bool, TID > URLToID(const string &inURL) const =0
Translate an URL to its document ID.
CIDToOffset mIDToOffset
map from feature id to the offset for this feature
Definition: CAcSQLInvertedFile.h:112
TID getMaximumFeatureID() const
This is interesting for browsing.
bool init(bool)
called by constructors
This class captures the structure of an XML element.
Definition: CXMLElement.h:51
A list of Document Frequency Elements (the main part of an inverted file)
Definition: CDocumentFrequencyList.h:58
string mFeatureDescriptionFileName
Name for the file with the feature description.
Definition: CAcSQLInvertedFile.h:107
virtual list< TID > * getAllFeatureIDs() const =0
Getting a list of all features contained in this.
virtual bool checkConsistency()=0
Check the consistency of the inverted file system accessed by this accessor.
bool findWithinStream(TID inFeatureID, TID inDocumentID, double inDocumentFrequency) const
Is the Document with inDocumentID contained in the document frequency list of the feature inFeatureID...
virtual double DIDToSquareDFLogICFSum(TID) const =0
Returns this function for a given document ID.
virtual unsigned int getFeatureDescription(TID inFeatureID) const =0
What kind of feature is the feature with ID inFeatureID?
~CAcInvertedFile()
Destructor.
TID mMaximumFeatureID
the maximum feature ID arising in this file
Definition: CAcSQLInvertedFile.h:87
CADIHash.
Definition: CADIHash.h:53
An accessor to an inverted file.
Definition: CAcInvertedFile.h:83
virtual double DIDToDFSquareSum(TID) const =0
Returns the document-frequency square sum for a given document ID.
CDocumentFrequencyList * getFeatureFile(string inFileName) const
loads a *.fts file.
CAcInvertedFile(const CXMLElement &inCollectionElement)
This opens an exsisting inverted file, and then inits this structure.
virtual double FeatureToCollectionFrequency(TFeatureID) const =0
Collection frequency for a given feature.
virtual CDocumentFrequencyList * FeatureToList(TFeatureID inFID) const =0
Give the List of documents containing the feature inFID.
virtual bool operator()() const =0
for testing if the inverted file is correctly constructed
virtual bool generateInvertedFile()=0
Generating an inverted File, if there is none.
hash_map< TID, double > mFeatureToCollectionFrequency
map from feature to the collection frequency
Definition: CAcSQLInvertedFile.h:115
CSelfDestroyPointer< istream > mInvertedFile
The inverted file.
Definition: CAcSQLInvertedFile.h:92
hash_map< TID, unsigned int > CIDToOffset
map from feature id to the offset for this feature
Definition: CAcSQLInvertedFile.h:110
ifstream mOffsetFile
Feature -> Offset in inverted file.
Definition: CAcSQLInvertedFile.h:95
hash_map< TID, unsigned int > mFeatureDescription
map from the feature ID to the feature description
Definition: CAcSQLInvertedFile.h:120
CADIHash mDocumentInformation
additional information about the document like, e.g.
Definition: CAcSQLInvertedFile.h:125
virtual CDocumentFrequencyList * DIDToFeatureList(TID inDID) const =0
List of features contained by a document with ID inDID.
string mInvertedFileName
Name of the inverted file.
Definition: CAcSQLInvertedFile.h:101
ifstream mFeatureDescriptionFile
File of feature descriptions.
Definition: CAcSQLInvertedFile.h:98
CArraySelfDestroyPointer< char > mInvertedFileBuffer
A buffer, if the inverted file is to be held in ram.
Definition: CAcSQLInvertedFile.h:90
This accessor is a base class for accessors which use an URL2FTS file to implement the interface of t...
Definition: CAcURL2FTS.h:66
virtual CDocumentFrequencyList * URLToFeatureList(string inURL) const =0
List of features contained by a document with URL inURL.
void writeOffsetFileElement(TID inFeatureID, int inPosition, ostream &inOpenOffsetFile)
add a pair of FeatureID,Offset to the open offset file (helper function for inverted file constructio...
string mOffsetFileName
Name of the Offset file.
Definition: CAcSQLInvertedFile.h:104
virtual string IDToURL(TID inID) const =0
bool newGenerateInvertedFile()
Generating an inverted File, if there is none.