Difference between revisions of "User:WindBOT/LinkClass"
m |
m (+anchor filter) |
||
(3 intermediate revisions by the same user not shown) | |||
Line 2: | Line 2: | ||
def __init__(self, content): | def __init__(self, content): | ||
content = u(content) | content = u(content) | ||
+ | self.joined = False | ||
self.setBody(content) | self.setBody(content) | ||
self.setType(u'unknown') | self.setType(u'unknown') | ||
+ | self.setLabel(None) | ||
+ | self.setLink(u'') | ||
+ | self.anchor = None | ||
self.joined = False | self.joined = False | ||
if len(content) > 2: | if len(content) > 2: | ||
Line 13: | Line 17: | ||
if lnk.find(u':') == -1: | if lnk.find(u':') == -1: | ||
lnk = lnk.replace(u'_', u' ') | lnk = lnk.replace(u'_', u' ') | ||
+ | anchor = None | ||
+ | if lnk.find(u'#') != -1: | ||
+ | lnk, anchor = lnk.split(u'#', 1) | ||
+ | self.setAnchor(anchor) | ||
self.setLink(lnk) | self.setLink(lnk) | ||
if len(split) == 2: | if len(split) == 2: | ||
Line 18: | Line 26: | ||
else: | else: | ||
self.setLabel(split[0]) | self.setLabel(split[0]) | ||
− | self.joined = | + | self.joined = anchor is None |
elif content[0] == u'[' and content[-1] == u']': | elif content[0] == u'[' and content[-1] == u']': | ||
split = content[1:-1].split(u' ', 1) | split = content[1:-1].split(u' ', 1) | ||
Line 31: | Line 39: | ||
def getBody(self): | def getBody(self): | ||
return u(self.body) | return u(self.body) | ||
− | def getLink(self): | + | def getLink(self, withAnchor=False): |
+ | if withAnchor and self.getAnchor() is not None: | ||
+ | return u(self.link) + u'#' + self.getAnchor() | ||
return u(self.link) | return u(self.link) | ||
+ | def getAnchor(self): | ||
+ | return self.anchor | ||
def getLabel(self): | def getLabel(self): | ||
if self.label is None: | if self.label is None: | ||
Line 44: | Line 56: | ||
self.body = u(body) | self.body = u(body) | ||
def setLink(self, link): | def setLink(self, link): | ||
− | self.link = u(link | + | link = u(link) |
+ | if self.getType() == u'internal' and link.find(u'#') != -1: | ||
+ | link, anchor = link.split(u'#', 1) | ||
+ | self.setAnchor(anchor) | ||
+ | self.link = link | ||
if self.joined: | if self.joined: | ||
self.label = u(link) | self.label = u(link) | ||
+ | replaceDots = compileRegex(r'(?:\.[a-f\d][a-f\d])+') | ||
+ | def _replaceDots(self, g): | ||
+ | s = '' | ||
+ | g = g.group(0) | ||
+ | for i in xrange(0, len(g), 3): | ||
+ | s += chr(int(g[i + 1:i + 3], 16)) | ||
+ | return s.decode('utf8') | ||
+ | def setAnchor(self, anchor): | ||
+ | if self.getType() == u'internal': | ||
+ | u(anchor).replace(u'_', u' ') | ||
+ | try: | ||
+ | anchor = link.replaceDots.sub(self._replaceDots, anchor) | ||
+ | except: | ||
+ | pass | ||
+ | self.anchor = anchor | ||
def setLabel(self, label): | def setLabel(self, label): | ||
if label is None: | if label is None: | ||
Line 57: | Line 88: | ||
return self.__unicode__() | return self.__unicode__() | ||
def __repr__(self): | def __repr__(self): | ||
− | return self. | + | return u'<Link-' + self.getType() + u': ' + self.__unicode__() + u'>' |
def __unicode__(self): | def __unicode__(self): | ||
+ | label = self.getLabel() | ||
+ | tmpLink = self.getLink(withAnchor=True) | ||
if self.getType() == u'internal': | if self.getType() == u'internal': | ||
− | |||
− | |||
tmpLink2 = tmpLink.replace(u'_', u' ') | tmpLink2 = tmpLink.replace(u'_', u' ') | ||
− | if label in (tmpLink2, tmpLink) | + | if label in (tmpLink2, tmpLink) or (label and tmpLink and (label[0].lower() == tmpLink[0].lower() and tmpLink[1:] == label[1:]) or (label[0].lower() == tmpLink2[0].lower() and tmpLink2[1:] == label[1:])): |
− | |||
− | |||
return u'[[' + label + u']]' | return u'[[' + label + u']]' | ||
− | elif tmpLink and label and len(label) > len(tmpLink) and (label.lower().find(tmpLink2.lower()) | + | elif tmpLink and label and len(label) > len(tmpLink) and (label.lower().find(tmpLink2.lower()) == 0 or label.lower().find(tmpLink.lower()) == 0): |
index = max(label.lower().find(tmpLink2.lower()), label.lower().find(tmpLink.lower())) | index = max(label.lower().find(tmpLink2.lower()), label.lower().find(tmpLink.lower())) | ||
− | if label[:index].find( | + | badchars = (u' ', u'_') |
+ | nobadchars = True | ||
+ | for c in badchars: | ||
+ | if label[:index].find(c) != -1 or label[index+len(tmpLink):].find(c) != -1: | ||
+ | nobadchars = False | ||
+ | if nobadchars: | ||
return label[:index] + u(link(u'[[' + tmpLink + u'|' + label[index:index+len(tmpLink)] + u']]')) + label[index+len(tmpLink):] | return label[:index] + u(link(u'[[' + tmpLink + u'|' + label[index:index+len(tmpLink)] + u']]')) + label[index+len(tmpLink):] | ||
return u'[[' + tmpLink + u'|' + label + u']]' | return u'[[' + tmpLink + u'|' + label + u']]' |
Latest revision as of 22:09, 23 March 2012
class link: def __init__(self, content): content = u(content) self.joined = False self.setBody(content) self.setType(u'unknown') self.setLabel(None) self.setLink(u'') self.anchor = None self.joined = False if len(content) > 2: if content[:2] == u'[[' and content[-2:] == u']]': split = content[2:-2].split(u'|') if len(split) in (1, 2): self.setType(u'internal') lnk = split[0] if lnk.find(u':') == -1: lnk = lnk.replace(u'_', u' ') anchor = None if lnk.find(u'#') != -1: lnk, anchor = lnk.split(u'#', 1) self.setAnchor(anchor) self.setLink(lnk) if len(split) == 2: self.setLabel(split[1]) else: self.setLabel(split[0]) self.joined = anchor is None elif content[0] == u'[' and content[-1] == u']': split = content[1:-1].split(u' ', 1) self.setType(u'external') self.setLink(split[0]) if len(split) == 2: self.setLabel(split[1]) else: self.setLabel(None) def getType(self): return u(self.kind) def getBody(self): return u(self.body) def getLink(self, withAnchor=False): if withAnchor and self.getAnchor() is not None: return u(self.link) + u'#' + self.getAnchor() return u(self.link) def getAnchor(self): return self.anchor def getLabel(self): if self.label is None: return None if self.joined: return self.getLink() return u(self.label) def setType(self, kind): self.kind = u(kind) def setBody(self, body): self.body = u(body) def setLink(self, link): link = u(link) if self.getType() == u'internal' and link.find(u'#') != -1: link, anchor = link.split(u'#', 1) self.setAnchor(anchor) self.link = link if self.joined: self.label = u(link) replaceDots = compileRegex(r'(?:\.[a-f\d][a-f\d])+') def _replaceDots(self, g): s = '' g = g.group(0) for i in xrange(0, len(g), 3): s += chr(int(g[i + 1:i + 3], 16)) return s.decode('utf8') def setAnchor(self, anchor): if self.getType() == u'internal': u(anchor).replace(u'_', u' ') try: anchor = link.replaceDots.sub(self._replaceDots, anchor) except: pass self.anchor = anchor def setLabel(self, label): if label is None: self.label = None else: self.label = u(label) if self.joined: self.link = u(label) def __str__(self): return self.__unicode__() def __repr__(self): return u'<Link-' + self.getType() + u': ' + self.__unicode__() + u'>' def __unicode__(self): label = self.getLabel() tmpLink = self.getLink(withAnchor=True) if self.getType() == u'internal': tmpLink2 = tmpLink.replace(u'_', u' ') if label in (tmpLink2, tmpLink) or (label and tmpLink and (label[0].lower() == tmpLink[0].lower() and tmpLink[1:] == label[1:]) or (label[0].lower() == tmpLink2[0].lower() and tmpLink2[1:] == label[1:])): return u'[[' + label + u']]' elif tmpLink and label and len(label) > len(tmpLink) and (label.lower().find(tmpLink2.lower()) == 0 or label.lower().find(tmpLink.lower()) == 0): index = max(label.lower().find(tmpLink2.lower()), label.lower().find(tmpLink.lower())) badchars = (u' ', u'_') nobadchars = True for c in badchars: if label[:index].find(c) != -1 or label[index+len(tmpLink):].find(c) != -1: nobadchars = False if nobadchars: return label[:index] + u(link(u'[[' + tmpLink + u'|' + label[index:index+len(tmpLink)] + u']]')) + label[index+len(tmpLink):] return u'[[' + tmpLink + u'|' + label + u']]' if self.getType() == u'external': if label is None: return u'[' + tmpLink + u']' return u'[' + tmpLink + u' ' + label + u']' return self.getBody()