org.seasar.robot.transformer.impl
クラス HtmlTransformer
java.lang.Object
org.seasar.robot.transformer.impl.AbstractTransformer
org.seasar.robot.transformer.impl.HtmlTransformer
- すべての実装されたインタフェース:
- Transformer
- 直系の既知のサブクラス:
- FileTransformer, XpathTransformer
public class HtmlTransformer
- extends AbstractTransformer
HtmlTransformer stores WEB data as HTML content.
- 作成者:
- shinsuke
メソッドの概要 |
protected void |
addChildUrlFromTagAttribute(List<String> urlList,
URL url,
String attrValue,
String encoding)
|
void |
addChildUrlRule(String tagName,
String attrName)
|
void |
addFeature(String key,
String value)
|
void |
addProperty(String key,
String value)
|
protected List<String> |
convertChildUrlList(List<String> urlList)
|
protected File |
createResponseBodyFile(ResponseData responseData)
|
protected String |
encodeUrl(String url,
String enc)
|
protected String |
getBaseHref(Document document)
|
Map<String,String> |
getChildUrlRuleMap()
|
Object |
getData(AccessResultData accessResultData)
Returns data as HTML content of String. |
String |
getDefaultEncoding()
|
protected org.cyberneko.html.parsers.DOMParser |
getDomParser()
|
protected String |
getDuplicateUrl(String url)
|
Map<String,String> |
getFeatureMap()
|
Pattern |
getInvalidUrlPattern()
|
int |
getPreloadSizeForCharset()
|
Map<String,String> |
getPropertyMap()
|
protected List<String> |
getUrlFromTagAttribute(URL url,
Document document,
String xpath,
String attr,
String encoding)
|
protected org.apache.xpath.CachedXPathAPI |
getXPathAPI()
|
protected boolean |
isHtml(ResponseData responseData)
|
protected boolean |
isSupportedCharset(String charsetName)
|
protected boolean |
isValidPath(String path)
|
protected String |
loadCharset(InputStream inputStream)
|
protected String |
normalizeUrl(String u)
|
protected String |
parseCharset(String content)
|
void |
setChildUrlRuleMap(Map<String,String> childUrlRuleMap)
|
void |
setDefaultEncoding(String defaultEncoding)
|
void |
setFeatureMap(Map<String,String> featureMap)
|
void |
setInvalidUrlPattern(Pattern invalidUrlPattern)
|
void |
setPreloadSizeForCharset(int preloadSizeForCharset)
|
void |
setPropertyMap(Map<String,String> propertyMap)
|
protected void |
storeChildUrls(ResponseData responseData,
ResultData resultData)
|
protected void |
storeData(ResponseData responseData,
ResultData resultData)
|
ResultData |
transform(ResponseData responseData)
|
protected void |
updateCharset(ResponseData responseData)
|
クラス java.lang.Object から継承されたメソッド |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
featureMap
protected Map<String,String> featureMap
propertyMap
protected Map<String,String> propertyMap
childUrlRuleMap
protected Map<String,String> childUrlRuleMap
defaultEncoding
protected String defaultEncoding
preloadSizeForCharset
protected int preloadSizeForCharset
invalidUrlPattern
protected Pattern invalidUrlPattern
HtmlTransformer
public HtmlTransformer()
transform
public ResultData transform(ResponseData responseData)
- 定義:
- インタフェース
Transformer
内の transform
- 定義:
- クラス
AbstractTransformer
内の transform
createResponseBodyFile
protected File createResponseBodyFile(ResponseData responseData)
isHtml
protected boolean isHtml(ResponseData responseData)
addChildUrlRule
public void addChildUrlRule(String tagName,
String attrName)
getXPathAPI
protected org.apache.xpath.CachedXPathAPI getXPathAPI()
storeChildUrls
protected void storeChildUrls(ResponseData responseData,
ResultData resultData)
convertChildUrlList
protected List<String> convertChildUrlList(List<String> urlList)
storeData
protected void storeData(ResponseData responseData,
ResultData resultData)
updateCharset
protected void updateCharset(ResponseData responseData)
isSupportedCharset
protected boolean isSupportedCharset(String charsetName)
loadCharset
protected String loadCharset(InputStream inputStream)
parseCharset
protected String parseCharset(String content)
getDuplicateUrl
protected String getDuplicateUrl(String url)
getDomParser
protected org.cyberneko.html.parsers.DOMParser getDomParser()
getBaseHref
protected String getBaseHref(Document document)
getUrlFromTagAttribute
protected List<String> getUrlFromTagAttribute(URL url,
Document document,
String xpath,
String attr,
String encoding)
addChildUrlFromTagAttribute
protected void addChildUrlFromTagAttribute(List<String> urlList,
URL url,
String attrValue,
String encoding)
encodeUrl
protected String encodeUrl(String url,
String enc)
normalizeUrl
protected String normalizeUrl(String u)
isValidPath
protected boolean isValidPath(String path)
addFeature
public void addFeature(String key,
String value)
addProperty
public void addProperty(String key,
String value)
getData
public Object getData(AccessResultData accessResultData)
- Returns data as HTML content of String.
getFeatureMap
public Map<String,String> getFeatureMap()
setFeatureMap
public void setFeatureMap(Map<String,String> featureMap)
getPropertyMap
public Map<String,String> getPropertyMap()
setPropertyMap
public void setPropertyMap(Map<String,String> propertyMap)
getChildUrlRuleMap
public Map<String,String> getChildUrlRuleMap()
setChildUrlRuleMap
public void setChildUrlRuleMap(Map<String,String> childUrlRuleMap)
getDefaultEncoding
public String getDefaultEncoding()
setDefaultEncoding
public void setDefaultEncoding(String defaultEncoding)
getPreloadSizeForCharset
public int getPreloadSizeForCharset()
setPreloadSizeForCharset
public void setPreloadSizeForCharset(int preloadSizeForCharset)
getInvalidUrlPattern
public Pattern getInvalidUrlPattern()
setInvalidUrlPattern
public void setInvalidUrlPattern(Pattern invalidUrlPattern)
Copyright © 2009-2010 The Seasar Foundation. All Rights Reserved.