|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectcc.mallet.topics.ParallelTopicModel
public class ParallelTopicModel
Simple parallel threaded implementation of LDA, following the UCI NIPS paper, with SparseLDA sampling scheme and data structure.
Field Summary | |
---|---|
protected double[] |
alpha
|
protected Alphabet |
alphabet
|
protected double |
alphaSum
|
protected double |
beta
|
protected double |
betaSum
|
int |
burninPeriod
|
protected java.util.ArrayList<TopicAssignment> |
data
|
static double |
DEFAULT_BETA
|
protected int[] |
docLengthCounts
|
protected java.text.NumberFormat |
formatter
|
protected java.lang.String |
modelFilename
|
int |
numIterations
|
protected int |
numTopics
|
protected int |
numTypes
|
int |
optimizeInterval
|
protected boolean |
printLogLikelihood
|
protected int |
randomSeed
|
protected int |
saveModelInterval
|
int |
saveSampleInterval
|
protected int |
saveStateInterval
|
int |
showTopicsInterval
|
protected java.lang.String |
stateFilename
|
protected int[] |
tokensPerTopic
|
protected LabelAlphabet |
topicAlphabet
|
protected int |
topicBits
|
protected int[][] |
topicDocCounts
|
protected int |
topicMask
|
protected int[][] |
typeTopicCounts
|
int |
wordsPerTopic
|
Constructor Summary | |
---|---|
ParallelTopicModel(int numberOfTopics)
|
|
ParallelTopicModel(int numberOfTopics,
double alphaSum,
double beta)
|
|
ParallelTopicModel(LabelAlphabet topicAlphabet,
double alphaSum,
double beta)
|
Method Summary | |
---|---|
void |
addInstances(InstanceList training)
|
void |
buildInitialTypeTopicCounts()
|
void |
estimate()
|
Alphabet |
getAlphabet()
|
java.util.ArrayList<TopicAssignment> |
getData()
|
TopicInferencer |
getInferencer()
|
int |
getNumTopics()
|
java.util.TreeSet[] |
getSortedWords()
Return an array of sorted sets (one set per topic). |
LabelAlphabet |
getTopicAlphabet()
|
java.lang.Object[][] |
getTopWords(int numWords)
Return an array (one element for each topic) of arrays of words, which are the most probable words for that topic in descending order. |
static void |
main(java.lang.String[] args)
|
double |
modelLogLikelihood()
|
void |
optimizeAlpha(WorkerRunnable[] runnables)
|
void |
printDocumentTopics(java.io.File file)
|
void |
printDocumentTopics(java.io.PrintWriter out)
|
void |
printDocumentTopics(java.io.PrintWriter out,
double threshold,
int max)
|
void |
printState(java.io.File f)
|
void |
printState(java.io.PrintStream out)
|
void |
printTopicWordWeights(java.io.File file)
|
void |
printTopicWordWeights(java.io.PrintWriter out)
Print an unnormalized weight for every word in every topic. |
void |
printTopWords(java.io.File file,
int numWords,
boolean useNewLines)
|
void |
printTopWords(java.io.PrintStream out,
int numWords,
boolean usingNewLines)
|
void |
printTypeTopicCounts(java.io.File file)
Write the internal representation of type-topic counts (count/topic pairs in descending order by count) to a file. |
static ParallelTopicModel |
read(java.io.File f)
|
void |
setBurninPeriod(int burninPeriod)
|
void |
setNumIterations(int numIterations)
|
void |
setNumThreads(int threads)
|
void |
setOptimizeInterval(int interval)
Interval for optimizing Dirichlet hyperparameters |
void |
setRandomSeed(int seed)
|
void |
setSaveSerializedModel(int interval,
java.lang.String filename)
Define how often and where to save a serialized model. |
void |
setSaveState(int interval,
java.lang.String filename)
Define how often and where to save a text representation of the current state. |
void |
setTopicDisplay(int interval,
int n)
|
void |
sumTypeTopicCounts(WorkerRunnable[] runnables)
|
void |
write(java.io.File serializedModelFile)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected java.util.ArrayList<TopicAssignment> data
protected Alphabet alphabet
protected LabelAlphabet topicAlphabet
protected int numTopics
protected int topicMask
protected int topicBits
protected int numTypes
protected double[] alpha
protected double alphaSum
protected double beta
protected double betaSum
public static final double DEFAULT_BETA
protected int[][] typeTopicCounts
protected int[] tokensPerTopic
protected int[] docLengthCounts
protected int[][] topicDocCounts
public int numIterations
public int burninPeriod
public int saveSampleInterval
public int optimizeInterval
public int showTopicsInterval
public int wordsPerTopic
protected int saveStateInterval
protected java.lang.String stateFilename
protected int saveModelInterval
protected java.lang.String modelFilename
protected int randomSeed
protected java.text.NumberFormat formatter
protected boolean printLogLikelihood
Constructor Detail |
---|
public ParallelTopicModel(int numberOfTopics)
public ParallelTopicModel(int numberOfTopics, double alphaSum, double beta)
public ParallelTopicModel(LabelAlphabet topicAlphabet, double alphaSum, double beta)
Method Detail |
---|
public Alphabet getAlphabet()
public LabelAlphabet getTopicAlphabet()
public int getNumTopics()
public java.util.ArrayList<TopicAssignment> getData()
public void setNumIterations(int numIterations)
public void setBurninPeriod(int burninPeriod)
public void setTopicDisplay(int interval, int n)
public void setRandomSeed(int seed)
public void setOptimizeInterval(int interval)
public void setNumThreads(int threads)
public void setSaveState(int interval, java.lang.String filename)
interval
- Save a copy of the state every interval
iterations.filename
- Save the state to this file, with the iteration number as a suffixpublic void setSaveSerializedModel(int interval, java.lang.String filename)
interval
- Save a serialized model every interval
iterations.filename
- Save to this file, with the iteration number as a suffixpublic void addInstances(InstanceList training)
public void buildInitialTypeTopicCounts()
public void sumTypeTopicCounts(WorkerRunnable[] runnables)
public void optimizeAlpha(WorkerRunnable[] runnables)
public void estimate() throws java.io.IOException
java.io.IOException
public void printTopWords(java.io.File file, int numWords, boolean useNewLines) throws java.io.IOException
java.io.IOException
public java.util.TreeSet[] getSortedWords()
public java.lang.Object[][] getTopWords(int numWords)
numWords
- The maximum length of each topic's array of words (may be less).public void printTopWords(java.io.PrintStream out, int numWords, boolean usingNewLines)
public void printTypeTopicCounts(java.io.File file) throws java.io.IOException
java.io.IOException
public void printTopicWordWeights(java.io.File file) throws java.io.IOException
java.io.IOException
public void printTopicWordWeights(java.io.PrintWriter out) throws java.io.IOException
java.io.IOException
public void printDocumentTopics(java.io.File file) throws java.io.IOException
java.io.IOException
public void printDocumentTopics(java.io.PrintWriter out)
public void printDocumentTopics(java.io.PrintWriter out, double threshold, int max)
out
- A print writerthreshold
- Only print topics with proportion greater than this numbermax
- Print no more than this many topicspublic void printState(java.io.File f) throws java.io.IOException
java.io.IOException
public void printState(java.io.PrintStream out)
public double modelLogLikelihood()
public TopicInferencer getInferencer()
public void write(java.io.File serializedModelFile)
public static ParallelTopicModel read(java.io.File f) throws java.lang.Exception
java.lang.Exception
public static void main(java.lang.String[] args)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |