Week 3 – Text Cloud with Concordance
February 10, 2008
This week I wanted to make a text cloud using java. I used the treemap iterator to get the words and word count from the input text, then plugged in the count to determine the font size and gave the words a semi-random position on the stage. I did a version using the processing libraries and one without. The commented sections represent the pure java version.
Here’s the code:
/* Ben Leduc-Mills */
/* Programming from A to Z */
/* Simple Text Concordance and text cloud */
/* Using a Java TreeMap */
/* Based on code by Dan Shiffman */
package concordance.processing;
//import java.awt.Font;
//import java.awt.Graphics;
import java.io.*;
//import java.lang.reflect.Array;
import java.util.*;
import a2z.*;
import java.util.regex.*;
import concordance.treemap.Word;
import processing.core.PApplet;
import processing.core.PFont;
public class blm272_week3 extends PApplet
{
TreeMap words;
PFont f;
public void setup() {
size(1000,1000);
fillConcordance(“obama.txt”);
f = createFont(“Georgia”,16,true);
}
public void draw() {
background(255);
// We’re done, print out contents of Tree!
// We’re done, print out contents of Tree!
System.out.println(“Here are the contents of your tree:”);
Iterator iterator = words.values().iterator();
while (iterator.hasNext()) {
Word word = (Word) iterator.next();
String output = word.getWord();
int values = word.getCount();
if (values < 3) {
values = values *(values+2);
}
//System.out.println(values);
fill(0);
textFont(f,values+5);
text(output, random(100,650), random(100,650));
}
noLoop();
}
/*public void paint (Graphics g) {
System.out.println(“Here are the contents of your tree:”);
Iterator iterator = words.values().iterator();
while (iterator.hasNext()) {
Word word = (Word) iterator.next();
//System.out.println(word.getWord() + ” ” + word.getCount());
String output = ((word.getWord() + ” “.toString()));
//System.out.println(output);
Font font = new Font(“Dialog”, Font.PLAIN, 48);
g.setFont(font);
g.drawString(output, 100, 100);
}
noLoop();
} */
public void fillConcordance(String path) {
try {
A2ZFileReader fr = new A2ZFileReader(path);
String content = fr.getContent();
// Step 2, create an empty Tree
words = new TreeMap();
// Step 3, break input file up into words
// We are doing this with split and a regular expression
String regex = “\\b”;
String tokens[] = content.split(regex);
// We’ll use a regular exrpession to match words with only characters and apostrophes
// Throwing away all the punctuation (we could do this with a different split regex too)
Pattern p = Pattern.compile(“[a-z']+”,Pattern.CASE_INSENSITIVE);
// For every word
for (int i = 0; i < tokens.length; i++)
{
String s = tokens[i].toLowerCase();
// If it matches our regex, insert it in the tree
Matcher m = p.matcher(s);
if (m.matches()) {
if (words.containsKey(s)) {
Word w = (Word) words.get(s);
w.count();
} else {
Word w = new Word(s);
words.put(s,w);
}
}
}
} catch (IOException e) {
System.out.println(“File I/O Error”);
e.printStackTrace();
}
}
}
Here’s a screen shot of the result on an Obama speech:
