Package tidy :: Module lib
[hide private]
[frames] | no frames]

Source Code for Module tidy.lib

  1  from __future__ import generators 
  2   
  3  import sys 
  4  import os.path 
  5  from itertools import count 
  6  packagedir = os.path.dirname(__file__) 
  7   
  8  # look for ctypes in the system path, then try looking for a private ctypes 
  9  # distribution 
 10  try: 
 11      import ctypes 
 12  except ImportError: 
 13      private_ctypes = os.path.join(packagedir, 'pvt_ctypes') 
 14      sys.path.insert(0, private_ctypes) 
 15      sys.path.insert(0, os.path.join(private_ctypes, 'ctypes.zip')) 
 16      import ctypes 
 17  from cStringIO import StringIO 
 18  import weakref 
 19  from tidy.error import * 
 20   
 21  # search the path for libtidy using the known names; try the package  
 22  # directory too 
 23  thelib=None 
 24  os.environ['PATH'] = "%s%s%s" % (packagedir, os.pathsep, os.environ['PATH']) 
 25  for libname in ('cygtidy-0-99-0', 'libtidy', 'libtidy.so', 
 26                  'libtidy-0.99.so.0', 'tidylib'): 
 27      try: 
 28          thelib = getattr(ctypes.cdll, libname) 
 29          break 
 30      except OSError: 
 31          pass 
 32  if not thelib: 
 33      raise OSError("Couldn't find libtidy, please make sure it is installed.") 
 34   
35 -class Loader:
36 """I am a trivial wrapper that eliminates the need for tidy.tidyFoo, 37 so you can just access tidy.Foo 38 """
39 - def __init__(self):
40 self.lib=thelib
41 - def __getattr__(self, name):
42 try: 43 return getattr(self.lib, "tidy%s" % name) 44 # current ctypes uses ValueError, future will use AttributeError 45 except (ValueError, AttributeError): 46 return getattr(self.lib, name)
47 48 _tidy=Loader() 49 50 # define a callback to pass to Tidylib
51 -def _putByte(handle, c):
52 """Lookup sink by handle and call its putByte method""" 53 sinkfactory[handle].putByte(c) 54 return 0
55 56 PUTBYTEFUNC=ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char) 57 putByte=PUTBYTEFUNC(_putByte) 58
59 -class _OutputSink(ctypes.Structure):
60 _fields_=[("sinkData", ctypes.c_int), 61 ("putByte", PUTBYTEFUNC), 62 ]
63
64 -class _Sink:
65 - def __init__(self):
66 self._data = StringIO() 67 self.struct = _OutputSink() 68 self.struct.putByte = putByte
69 - def putByte(self, c):
70 self._data.write(c)
71 - def __str__(self):
72 return self._data.getvalue()
73
74 -class ReportItem:
75 - def __init__(self, err):
76 self.err = err 77 if err.startswith('line'): 78 tokens = err.split(' ',6) 79 self.severity = tokens[5][0] # W or E 80 self.line = int(tokens[1]) 81 self.col = int(tokens[3]) 82 self.message = tokens[6] 83 else: 84 tokens = err.split(' ',1) 85 self.severity = tokens[0][0] 86 self.message = tokens[1] 87 self.line = None 88 self.col = None
89 # TODO - parse emacs mode
90 - def __str__(self):
91 severities = dict(W='Warning', E='Error', C='Config') 92 try: 93 if self.line: 94 return "line %d col %d - %s: %s" % (self.line, self.col, 95 severities[self.severity], 96 self.message) 97 98 else: 99 return "%s: %s" % (severities[self.severity], self.message) 100 except KeyError: 101 return self.err
102
103 - def __repr__(self):
104 return "%s('%s')" % (self.__class__.__name__, 105 str(self).replace("'", "\\'"))
106
107 -class FactoryDict(dict):
108 """I am a dict with a create method and no __setitem__. This allows 109 me to control my own keys. 110 """
111 - def create(self):
112 """Subclasses should implement me to generate a new item"""
113 - def _setitem(self, name, value):
114 dict.__setitem__(self, name, value)
115 - def __setitem__(self, name, value):
116 raise TypeError, "Use create() to get a new object"
117 118
119 -class SinkFactory(FactoryDict):
120 """Mapping for lookup of sinks by handle"""
121 - def __init__(self):
122 FactoryDict.__init__(self) 123 self.lastsink = 0
124 - def create(self):
125 sink = _Sink() 126 sink.struct.sinkData = self.lastsink 127 FactoryDict._setitem(self, self.lastsink, sink) 128 self.lastsink = self.lastsink+1 129 return sink
130 131 sinkfactory=SinkFactory() 132
133 -class _Document(object):
134 - def __init__(self):
135 self.cdoc = _tidy.Create() 136 self.errsink = sinkfactory.create() 137 _tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct))
138 - def write(self, stream):
139 stream.write(str(self))
140 - def get_errors(self):
141 ret = [] 142 for line in str(self.errsink).split('\n'): 143 line = line.strip(' \n\r') 144 if line: ret.append(ReportItem(line)) 145 return ret
146 errors=property(get_errors)
147 - def __str__(self):
148 stlen = ctypes.c_int(8192) 149 st = ctypes.c_buffer(stlen.value) 150 rc = _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen)) 151 if rc==-12: # buffer too small 152 st = ctypes.c_buffer(stlen.value) 153 _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen)) 154 return st.value
155 156 errors = {'missing or malformed argument for option: ': OptionArgError, 157 'unknown option: ': InvalidOptionError, 158 } 159 160
161 -class DocumentFactory(FactoryDict):
162 - def _setOptions(self, doc, **options):
163 for k in options.keys(): 164 165 # this will flush out most argument type errors... 166 if options[k] is None: options[k] = '' 167 168 _tidy.OptParseValue(doc.cdoc, 169 k.replace('_', '-'), 170 str(options[k])) 171 if doc.errors: 172 match=filter(doc.errors[-1].message.startswith, errors.keys()) 173 if match: 174 raise errors[match[0]](doc.errors[-1].message)
175 - def load(self, doc, arg, loader):
176 loader(doc.cdoc, arg) 177 _tidy.CleanAndRepair(doc.cdoc)
178 - def loadFile(self, doc, filename):
179 self.load(doc, filename, _tidy.ParseFile)
180 - def loadString(self, doc, st):
181 self.load(doc, st, _tidy.ParseString)
182 - def _create(self, *args, **kwargs):
183 doc = _Document() 184 self._setOptions(doc, **kwargs) 185 ref = weakref.ref(doc, self.releaseDoc) 186 FactoryDict._setitem(self, ref, doc.cdoc) 187 return doc
188 - def parse(self, filename, *args, **kwargs):
189 """Open and process filename as an HTML file, returning a 190 processed document object. 191 @param kwargs: named options to pass to TidyLib for processing 192 the input file. 193 @param filename: the name of a file to process 194 @return: a document object 195 """ 196 doc = self._create(**kwargs) 197 self.loadFile(doc, filename) 198 return doc
199 - def parseString(self, st, *args, **kwargs):
200 """Use st as an HTML file, and process it, returning a 201 document object. 202 @param kwargs: named options to pass to TidyLib for processing 203 the input file. 204 @param st: the string to parse 205 @return: a document object 206 """ 207 doc = self._create(**kwargs) 208 self.loadString(doc, st) 209 return doc
210 - def releaseDoc(self, ref):
211 _tidy.Release(self[ref])
212 213 docfactory = DocumentFactory() 214 parse = docfactory.parse 215 parseString = docfactory.parseString 216