1 |
tail -f postgres.log|grep "exec" | awk '{print $1,$16,$7}' |
1 |
tail -f /var/log/postgresql.log | egrep "errors" |
1 |
tail -f postgres.log|grep "exec" | awk '{print $1,$16,$7}' |
1 |
tail -f /var/log/postgresql.log | egrep "errors" |
1 2 3 |
CREATE LANGUAGE plperlu; create or replace function sleep(integer) returns integer as 'return sleep(shift)' language plperlu; |
1 |
ls -l /proc/PROCID/fd |
1 |
kill -KILL 9573 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
KILL(1) User Commands KILL(1) NAME kill - send a signal to a process SYNOPSIS kill [options] <pid> [...] DESCRIPTION The default signal for kill is TERM. Use -l or -L to list available signals. Particularly useful signals include HUP, INT, KILL, STOP, CONT, and 0. Alternate signals may be specified in three ways: -9, -SIGKILL or -KILL. Negative PID values may be used to choose whole process groups; see the PGID column in ps command output. A PID of -1 is special; it indicates all processes except the kill process itself and init. OPTIONS <pid> [...] Send signal to every <pid> listed. -<signal> -s <signal> --signal <signal> Specify the signal to be sent. The signal can be specified by using name or number. The behavior of signals is explained in signal(7) manual page. -l, --list [signal] List signal names. This option has optional argument, which will convert signal number to signal name, or other way round. -L, --table List signal names in a nice table. NOTES Your shell (command line interpreter) may have a built-in kill command. You may need to run the command NOTES Your shell (command line interpreter) may have a built-in kill command. You may need to run the command described here as /bin/kill to solve the conflict. EXAMPLES kill -9 -1 Kill all processes you can kill. kill -l 11 Translate number 11 into a signal name. kill -L List the available signal choices in a nice table. kill 123 543 2341 3453 Send the default signal, SIGTERM, to all those processes. SEE ALSO kill(2), killall(1), nice(1), pkill(1), renice(1), signal(7), skill(1) STANDARDS This command meets appropriate standards. The -L flag is Linux-specific. AUTHOR Albert Cahalan ⟨albert@users.sf.net⟩ wrote kill in 1999 to replace a bsdutils one that was not standards compliant. The util-linux one might also work correctly. REPORTING BUGS Please send bug reports to ⟨procps@freelists.org⟩ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# files containing "import" in subdirectories grep -rHn "import" * # files containing "import" or "except" grep -rHnE "import|except" * # only .py files containing "import" or "except" grep --color --include="*.py" -rHnE "import|except" * # python files, text files and java files grep --color --include="*[py|txt|java]" -rHnE "^import [a-z,\s]+$" * # same as above with extended find . -regextype posix-extended -iregex ".*.py|.*.txt|.*.txt" -exec grep --color -rHnE "import|except" '{}' \; -print #find . -name "*.py" -exec grep --color -rHnE "import|except" '{}' \; -print # print file names only find . -regextype posix-extended -iregex ".*.py|.*.txt|.*.java" -exec grep -rli "kch#opts=c#zcomp=lzo#c=true" '{}' \; # egrep <==> grep with -e or -E egrep -rHn 'recursive|fun|also' * |
1 |
bzip2 archivefile1.txt |
1 2 3 4 5 |
sudo date -s "01/18/2013 17:18:00" # Fri Jan 18 17:18:00 CET 2013 sudo date -s "YYYY-mm-DD HH:MM:SS-MS:00" |
1 |
sudo date -s "2013-01-16 13:57:56-06:00" |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
> man date DATE(1) User Commands DATE(1) NAME date - print or set the system date and time SYNOPSIS date [OPTION]... [+FORMAT] date [-u|--utc|--universal] [MMDDhhmm[[CC]YY][.ss]] DESCRIPTION Display the current time in the given FORMAT, or set the system date. -d, --date=STRING display time described by STRING, not `now' -f, --file=DATEFILE like --date once for each line of DATEFILE -r, --reference=FILE display the last modification time of FILE -R, --rfc-2822 output date and time in RFC 2822 format. Example: Mon, 07 Aug 2006 12:34:56 -0600 --rfc-3339=TIMESPEC output date and time in RFC 3339 format. TIMESPEC=`date', `seconds', or `ns' for date and time to the indicated precision. Date and time components are separated by a single space: 2006-08-07 12:34:56-06:00 -s, --set=STRING set time described by STRING -u, --utc, --universal print or set Coordinated Universal Time --help display this help and exit --version output version information and exit FORMAT controls the output. Interpreted sequences are: %% a literal % %a locale's abbreviated weekday name (e.g., Sun) %A locale's full weekday name (e.g., Sunday) %b locale's abbreviated month name (e.g., Jan) %B locale's full month name (e.g., January) %c locale's date and time (e.g., Thu Mar 3 23:05:25 2005) %C century; like %Y, except omit last two digits (e.g., 20) %d day of month (e.g., 01) %D date; same as %m/%d/%y %e day of month, space padded; same as %_d %F full date; same as %Y-%m-%d %g last two digits of year of ISO week number (see %G) %G year of ISO week number (see %V); normally useful only with %V %h same as %b %H hour (00..23) %I hour (01..12) %j day of year (001..366) %k hour, space padded ( 0..23); same as %_H %l hour, space padded ( 1..12); same as %_I %m month (01..12) %M minute (00..59) %n a newline %N nanoseconds (000000000..999999999) %p locale's equivalent of either AM or PM; blank if not known %P like %p, but lower case %r locale's 12-hour clock time (e.g., 11:11:04 PM) %R 24-hour hour and minute; same as %H:%M %s seconds since 1970-01-01 00:00:00 UTC %S second (00..60) %t a tab %T time; same as %H:%M:%S %u day of week (1..7); 1 is Monday %U week number of year, with Sunday as first day of week (00..53) %V ISO week number, with Monday as first day of week (01..53) %w day of week (0..6); 0 is Sunday %W week number of year, with Monday as first day of week (00..53) %x locale's date representation (e.g., 12/31/99) %X locale's time representation (e.g., 23:13:48) %y last two digits of year (00..99) %Y year %z +hhmm numeric time zone (e.g., -0400) %:z +hh:mm numeric time zone (e.g., -04:00) %::z +hh:mm:ss numeric time zone (e.g., -04:00:00) %:::z numeric time zone with : to necessary precision (e.g., -04, +05:30) %Z alphabetic time zone abbreviation (e.g., EDT) By default, date pads numeric fields with zeroes. The following optional flags may follow `%': - (hyphen) do not pad the field _ (underscore) pad with spaces 0 (zero) pad with zeros ^ use upper case if possible # use opposite case if possible After any flags comes an optional field width, as a decimal number; then an optional modifier, which is either E to use the locale's alternate representations if available, or O to use the locale's alternate numeric symbols if available. EXAMPLES Convert seconds since the epoch (1970-01-01 UTC) to a date $ date --date='@2147483647' Show the time on the west coast of the US (use tzselect(1) to find TZ) $ TZ='America/Los_Angeles' date Show the local time for 9AM next Friday on the west coast of the US $ date --date='TZ="America/Los_Angeles" 09:00 next Fri' DATE STRING The --date=STRING is a mostly free format human readable date string such as "Sun, 29 Feb 2004 16:21:42 -0800" or "2004-02-29 16:21:42" or even "next Thursday". A date string may contain items indicating calendar date, time of day, time zone, day of week, relative time, relative date, and numbers. An empty string indicates the beginning of the day. The date string format is more complex than is easily documented here but is fully described in the info documentation. AUTHOR Written by David MacKenzie. REPORTING BUGS Report date bugs to bug-coreutils@gnu.org GNU coreutils home page: <http://www.gnu.org/software/coreutils/> General help using GNU software: <http://www.gnu.org/gethelp/> Report date translation bugs to <http://translationproject.org/team/> COPYRIGHT Copyright © 2011 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>. This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. SEE ALSO The full documentation for date is maintained as a Texinfo manual. If the info and date programs are properly installed at your site, the command info coreutils 'date invocation' should give you access to the complete manual. GNU coreutils 8.12.197-032bb September 2011 DATE(1) |
1 |
sudo apt-get install -y pgbouncer |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# -*- coding: utf-8 -*- from __future__ import print_function import time import sys from time import strftime import inspect import traceback # http://www.siafoo.net/article/68 # being nice to subsequent decorator calls or chains, otherwise losing the name of the function def timing(result=True, params=True, start=False): def timing_decorator(f): def wrapper(*args, **kw): t1 = time.time() function_result = None try: function_result = f(*args, **kw) return function_result finally: t2 = time.time() from time import strftime strx = strftime("%Y-%m-%d %H:%M:%S") if params and result: if args and kw: strx +=' - % 8.f ms - %s(%r, %r) => %s' % ((t2-t1)*1000.0, f.func_name, str(args), str(kw), str(nprint(function_result))) elif args and not kw: strx +=' - % 8.f ms - %s(%r) => %s' % ((t2-t1)*1000.0, f.func_name, str(args), str(nprint(function_result))) elif not args and kw: strx +=' - % 8.f ms - %s(%r) => %s' % ((t2-t1)*1000.0, f.func_name, str(kw), str(nprint(function_result))) else: strx +=' - % 8.f ms - %s() => %s' % ((t2-t1)*1000.0, f.func_name, str(nprint(function_result))) elif params and not result: if args and kw: strx +=' - % 8.f ms - %s(%r, %r)' % ((t2-t1)*1000.0, f.func_name, str(args), str(kw)) elif args and not kw: strx +=' - % 8.f ms - %s(%r)' % ((t2-t1)*1000.0, f.func_name, str(args)) elif not args and kw: strx +=' - % 8.f ms - %s(%r)' % ((t2-t1)*1000.0, f.func_name, str(kw)) else: strx +=' - % 8.f ms - %s()' % ((t2-t1)*1000.0, f.func_name) elif not params and result: strx +=' - % 8.f ms - %s() => %s' % ((t2-t1)*1000.0, f.func_name, str(nprint(function_result))) else: strx +=' - % 8.f ms - %s()' % ((t2-t1)*1000.0, f.func_name ) if start: d = datetime.fromtimestamp(t1) strx += " start:"+d.strftime("%Y-%m-%d %H:%M:%S") print(strx) wrapper.func_name = f.func_name return wrapper return timing_decorator def nprint(element): from pprint import pprint import sys, StringIO out = StringIO.StringIO() pprint(element,out) return str(out.getvalue()) @timing() def slow_method(): from time import sleep sleep(1) print("Hi I'm a slow method") def main(): slow_method() if __name__ == "__main__": main() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# -*- coding: utf-8 -*- """ Source: http://stackoverflow.com/questions/1789254/clustering-text-in-python See also: http://docs.scipy.org/doc/scipy/reference/cluster.html http://hackmap.blogspot.com/2007/09/k-means-clustering-in-scipy.html """ import sys from math import log, sqrt from itertools import combinations def cosine_distance(a, b): cos = 0.0 a_tfidf = a["tfidf"] for token, tfidf in b["tfidf"].iteritems(): if token in a_tfidf: cos += tfidf * a_tfidf[token] return cos def normalize(features): norm = 1.0 / sqrt(sum(i**2 for i in features.itervalues())) for k, v in features.iteritems(): features[k] = v * norm return features def add_tfidf_to(documents): tokens = {} for id, doc in enumerate(documents): tf = {} doc["tfidf"] = {} doc_tokens = doc.get("tokens", []) for token in doc_tokens: tf[token] = tf.get(token, 0) + 1 num_tokens = len(doc_tokens) if num_tokens > 0: for token, freq in tf.iteritems(): tokens.setdefault(token, []).append((id, float(freq) / num_tokens)) doc_count = float(len(documents)) for token, docs in tokens.iteritems(): idf = log(doc_count / len(docs)) for id, tf in docs: tfidf = tf * idf if tfidf > 0: documents[id]["tfidf"][token] = tfidf for doc in documents: doc["tfidf"] = normalize(doc["tfidf"]) def choose_cluster(node, cluster_lookup, edges): new = cluster_lookup[node] if node in edges: seen, num_seen = {}, {} for target, weight in edges.get(node, []): seen[cluster_lookup[target]] = seen.get( cluster_lookup[target], 0.0) + weight for k, v in seen.iteritems(): num_seen.setdefault(v, []).append(k) new = num_seen[max(num_seen)][0] return new def majorclust(graph): cluster_lookup = dict((node, i) for i, node in enumerate(graph.nodes)) count = 0 movements = set() finished = False while not finished: finished = True for node in graph.nodes: new = choose_cluster(node, cluster_lookup, graph.edges) move = (node, cluster_lookup[node], new) if new != cluster_lookup[node] and move not in movements: movements.add(move) cluster_lookup[node] = new finished = False clusters = {} for k, v in cluster_lookup.iteritems(): clusters.setdefault(v, []).append(k) return clusters.values() def get_distance_graph(documents): class Graph(object): def __init__(self): self.edges = {} def add_edge(self, n1, n2, w): self.edges.setdefault(n1, []).append((n2, w)) self.edges.setdefault(n2, []).append((n1, w)) graph = Graph() doc_ids = range(len(documents)) graph.nodes = set(doc_ids) for a, b in combinations(doc_ids, 2): graph.add_edge(a, b, cosine_distance(documents[a], documents[b])) return graph def get_documents(): textlist = """ foo blub baz foo bar baz asdf bsdf csdf foo bab blub csdf hddf kjtz 123 456 890 321 890 456 foo 123 890 uiop """ texts = [ t.strip() for t in textlist.split("\n") if t and t.strip() ] return [{"text": text, "tokens": text.split()} for i, text in enumerate(texts)] def main(args): documents = get_documents() add_tfidf_to(documents) dist_graph = get_distance_graph(documents) for cluster in majorclust(dist_graph): print "="*20 for doc_id in cluster: print documents[doc_id]["text"] print "done" if __name__ == '__main__': main(sys.argv) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
""" bktree.py, by bearophile Fast Levenshtein distance and BK-tree implementations in Python. The following functions are designed for Psyco, they are too much slow without it. """ def editDistance(s1, s2): """Computes the Levenshtein distance between two arrays (strings too). Such distance is the minimum number of operations needed to transform one array into the other, where an operation is an insertion, deletion, or substitution of a single item (like a char). This implementation (Wagner-Fischer algorithm with just 2 lines) uses O(min(|s1|, |s2|)) space. editDistance([], []) 0 >>> editDistance([1, 2, 3], [2, 3, 5]) 2 >>> tests = [["", ""], ["a", ""], ["", "a"], ["a", "a"], ["x", "a"], ... ["aa", ""], ["", "aa"], ["aa", "aa"], ["ax", "aa"], ["a", "aa"], ["aa", "a"], ... ["abcdef", ""], ["", "abcdef"], ["abcdef", "abcdef"], ... ["vintner", "writers"], ["vintners", "writers"]]; >>> [editDistance(s1, s2) for s1,s2 in tests] [0, 1, 1, 0, 1, 2, 2, 0, 1, 1, 1, 6, 6, 0, 5, 4] """ # This function is designed for Psyco if s1 == s2: return 0 # this is fast in Python if len(s1) > len(s2): s1, s2 = s2, s1 r1 = range(len(s2) + 1) r2 = [0] * len(r1) i = 0 for c1 in s1: r2[0] = i + 1 j = 0 for c2 in s2: if c1 == c2: r2[j+1] = r1[j] else: a1 = r2[j] a2 = r1[j] a3 = r1[j+1] if a1 > a2: if a2 > a3: r2[j+1] = 1 + a3 else: r2[j+1] = 1 + a2 else: if a1 > a3: r2[j+1] = 1 + a3 else: r2[j+1] = 1 + a1 j += 1 aux = r1; r1 = r2; r2 = aux i += 1 return r1[-1] def editDistanceFast(s1, s2, r1=[0]*35, r2=[0]*35): """Computes the Levenshtein distance between two arrays (strings too). Such distance is the minimum number of operations needed to transform one array into the other, where an operation is an insertion, deletion, or substitution of a single item (like a char). This implementation (Wagner-Fischer algorithm with just 2 lines) uses O(min(|s1|, |s2|)) space. This version is a bit faster but it works only with strings up to 34 items long. editDistanceFast([], []) 0 >>> editDistanceFast([1, 2, 3], [2, 3, 5]) 2 >>> tests = [["", ""], ["a", ""], ["", "a"], ["a", "a"], ["x", "a"], ... ["aa", ""], ["", "aa"], ["aa", "aa"], ["ax", "aa"], ["a", "aa"], ["aa", "a"], ... ["abcdef", ""], ["", "abcdef"], ["abcdef", "abcdef"], ... ["vintner", "writers"], ["vintners", "writers"]]; >>> [editDistanceFast(s1, s2) for s1,s2 in tests] [0, 1, 1, 0, 1, 2, 2, 0, 1, 1, 1, 6, 6, 0, 5, 4] """ # This function is designed for Psyco if s1 == s2: return 0 # this is fast in Python if len(s1) > len(s2): s1, s2 = s2, s1 len_s2 = len(s2) assert len(s2) <= 34, "Error: one input sequence is too much long (> 34), use editDistance()." for i in xrange(len_s2 + 1): r1[i] = i r2[i] = 0 i = 0 for c1 in s1: r2[0] = i + 1 j = 0 for c2 in s2: if c1 == c2: r2[j+1] = r1[j] else: a1 = r2[j] a2 = r1[j] a3 = r1[j+1] if a1 > a2: if a2 > a3: r2[j+1] = 1 + a3 else: r2[j+1] = 1 + a2 else: if a1 > a3: r2[j+1] = 1 + a3 else: r2[j+1] = 1 + a1 j += 1 aux = r1; r1 = r2; r2 = aux i += 1 return r1[len_s2] import gc try: import psyco psyco.bind(editDistance) psyco.bind(editDistanceFast) from psyco.classes import psyobj except ImportError: psyobj = object class BKtree(psyobj): """ BKtree(items, distance, usegc=False): inputs are an iterable of hashable items that must allow the next() method too, and a callable that computes the distance (that mets the positivity, symmetry and triangle inequality conditions) between two items. It allows a fast search of similar items. The indexing phase may be slow, so this is useful only if you want to perform many searches. It raises a AttributeError if items doesn't have the .next() method. It can be used with strings, using editDistance()/editDistanceFast() Once initialized, you can retrieve items using xfind/find, giving an item and a threshold distance. You can disable the GC during the indexing phase to speed it up (default disabled), enabling it you may save some memory. If you have Psyco you can use it to speed up editDistanceFast. You can speed up this class with (but not binding it with Psyco): from psyco.classes import __metaclass__ You can also use the psyco metaclass just for this BKtree class, with psyobj. >>> t = BKtree([], distance=editDistanceFast) Traceback (most recent call last): ... AttributeError: 'list' object has no attribute 'next' >>> t = BKtree(iter([]), distance=editDistanceFast) >>> t.find("hello", 1), t.find("", 0) ([], []) >>> ws = "abyss almond clump cubic cuba adopt abused chronic abutted cube clown admix almsman" >>> t = BKtree(iter(ws.split()), distance=editDistanceFast) >>> [len(t.find("cuba", th)) for th in range(7)] [1, 2, 3, 4, 5, 9, 13] >>> [t.find("cuba", th) for th in range(4)] [['cuba'], ['cuba', 'cube'], ['cubic', 'cuba', 'cube'], ['clump', 'cubic', 'cuba', 'cube']] >>> [len(t.find("abyss", th)) for th in range(7)] [1, 1, 1, 2, 4, 12, 12] >>> [t.find("abyss", th) for th in range(4)] [['abyss'], ['abyss'], ['abyss'], ['abyss', 'abused']] """ def __init__(self, items, distance, usegc=False): self.distance = distance self.nodes = {} try: self.root = items.next() except StopIteration: self.root = "" return self.nodes[self.root] = [] # the value is a list of tuples (word, distance) gc_on = gc.isenabled() if not usegc: gc.disable() for el in items: if el not in self.nodes: # do not add duplicates self._addLeaf(self.root, el) if gc_on: gc.enable() def _addLeaf(self, root, item): dist = self.distance(root, item) if dist > 0: for arc in self.nodes[root]: if dist == arc[1]: self._addLeaf(arc[0], item) break else: if item not in self.nodes: self.nodes[item] = [] self.nodes[root].append((item, dist)) def find(self, item, threshold): "Return an array with all the items found with distance <= threshold from item." result = [] if self.nodes: self._finder(self.root, item, threshold, result) return result def _finder(self, root, item, threshold, result): dist = self.distance(root, item) if dist <= threshold: result.append(root) dmin = dist - threshold dmax = dist + threshold for arc in self.nodes[root]: if dmin <= arc[1] <= dmax: self._finder(arc[0], item, threshold, result) def xfind(self, item, threshold): "Like find, but yields items lazily. This is slower than find if you need a list." if self.nodes: return self._xfinder(self.root, item, threshold) def _xfinder(self, root, item, threshold): dist = self.distance(root, item) if dist <= threshold: yield root dmin = dist - threshold dmax = dist + threshold for arc in self.nodes[root]: if dmin <= arc[1] <= dmax: for node in self._xfinder(arc[0], item, threshold): yield node if __name__ == "__main__": import doctest doctest.testmod() print "Tests finished." # You need a list of words #words = file("somewordlist.txt").read().split() words = iter("""periclean germs progressed laughing allying wasting harassing nonsynchronous grumbled ledgers schelling shod mutating statewide schuman following reddened nairobi cultivate malted overpowering mechanic paraphrase lucerne plugged wick complimented roarer supercomputer impromptu cormorant abandons equalized channing chill bacon nonnumerical cabana amazing rheumatism""".split()) tree = BKtree(words, editDistanceFast) print tree.find("cube", 4) # ['cabana', 'wick', 'chill', 'shod'] for thresh in xrange(12): print thresh, len(tree.find("cube", thresh)) |
1 2 3 4 5 6 7 8 9 10 11 12 |
def property(function): keys = 'fget', 'fset', 'fdel' func_locals = {'doc':function.__doc__} def probe_func(frame, event, arg): if event == 'return': locals = frame.f_locals func_locals.update(dict((k, locals.get(k)) for k in keys)) sys.settrace(None) return probe_func sys.settrace(probe_func) function() return property(**func_locals) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
from math import radians, degrees, pi class Angle(object): def __init__(self, rad): self._rad = rad @property def rad(): '''The angle in radians''' def fget(self): return self._rad def fset(self, angle): if isinstance(angle, Angle): angle = angle.rad self._rad = float(angle) @property def deg(): '''The angle in degrees''' def fget(self): return degrees(self._rad) def fset(self, angle): if isinstance(angle, Angle): angle = angle.deg self._rad = radians(angle) |
1 2 3 4 5 |
def re_raise_exception(new_exc, exc_info=None): if not exc_info: exc_info = sys.exc_info() _exc_class, _exc, tb = exc_info raise new_exc.__class__, new_exc, tb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import time def retry(ExceptionToCheck, tries=4, delay=3, backoff=2): def deco_retry(f): def f_retry(*args, **kwargs): mtries, mdelay = tries, delay while mtries > 0: try: return f(*args, **kwargs) except ExceptionToCheck, e: print "%s, Retrying in %d seconds..." % (str(e), mdelay) time.sleep(mdelay) mtries -= 1 mdelay *= backoff lastException = e raise lastException return f_retry # true decorator return deco_retry |
1 2 3 4 5 |
@retry(Exception, tries=4) def test_fail(text): raise Exception("Fail") test_fail("it works!")u |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
import functools def singleton(cls): """ Use class as singleton. """ cls.__new_original__ = cls.__new__ @functools.wraps(cls.__new__) def singleton_new(cls, *args, **kw): it = cls.__dict__.get('__it__') if it: return it cls.__it__ = it = cls.__new_original__(cls, *args, **kw) it.__init_original__(*args, **kw) return it cls.__new__ = singleton_new cls.__init_original__ = cls.__init__ cls.__init__ = object.__init__ return cls |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
@singleton class Foo: def __new__(cls): cls.x = 10 return object.__new__(cls) def __init__(self): assert self.x == 10 self.x = 15 assert Foo().x == 15 Foo().x = 20 assert Foo().x == 20 |
1 2 3 4 5 6 7 |
# download http://qt.nokia.com/downloads/sdk-linux-x11-32bit-cpp-offline wget http://www.developer.nokia.com/dp?uri=http%3A%2F%2Fsw.nokia.com%2Fid%2F8ea74da4-fec1-4277-8b26-c58cc82e204b%2FQt_SDK_Lin32_offline chmod u+x ./QtSdk-offline-linux-x86-v1.2.1.run sudo ./QtSdk-offline-linux-x86-v1.2.1.run # install Qt4 Library sudo apt-get install -y python-lxml qt4-qmake |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
# install cssselect - fix "ImportError: No module named cssselect" sudo pip install cssselect # install webkit-server git clone https://github.com/niklasb/webkit-server.git webkit-server cd webkit-server sudo python setup.py install # install dryscrape # sudo pip install dryscrape git clone https://github.com/niklasb/dryscrape.git dryscrape cd dryscrape sudo python setup.py install |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# -*- coding: utf-8 -*- """ Installation of needed libraries sudo apt-get install -y python-pip sudo pip install PIL numpy """ import os, time, re, urllib from PIL import Image import logging format= '%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(funcName)s() - %(message)s' format= '%(asctime)s - %(filename)s:%(lineno)s - %(message)s' logging.basicConfig(level=logging.DEBUG, format=format) logger = logging.getLogger(__name__) def main(): download_base_directory = '/tmp/imagesimilarity/' urls_image_pairs=""" http://www.linuxfestnorthwest.org/sites/default/files/sponsors/elephant.png http://www.linuxfestnorthwest.org/sites/default/files/sponsors/elephant.png ####### http://www.linuxfestnorthwest.org/sites/default/files/sponsors/elephant.png http://terminaltwister.com/wp-content/uploads/2013/09/220px-Postgresql_elephant.svg_.png ####### """ begin_similarty_compare(urls_image_pairs, download_base_directory) def begin_similarty_compare(url_texts, download_base_directory): logger.debug("image file location: %s"%(download_base_directory )) url_pairs = re.split('#+',url_texts) urls = url_texts.strip().split() idx=0 # counter for downloaded image names for url_text in url_pairs: pair = url_text.strip().split() if not pair: continue filepath_url = [] for url in pair: url = url.strip() filename = url.split('/')[-1] idx+=1 filename = "%02.f-%s"%(idx,filename) # creates unique enumerated filenames #logger.debug("filename %s"%(filename)) filepath = os.path.join(download_base_directory, filename) mkdir_p_filepath(filepath) if not os.path.exists(filepath): logger.debug("downloading image %s to %s ..."%(url, base_directory)) urllib.urlretrieve(url, filepath) logger.debug("downloading done") filepath_url.append( (filepath, url) ) logger.debug("*"*20) logger.debug("compare images start") image_filepath1, url1 = filepath_url[0][0], filepath_url[0][1] logger.debug("image1: %s (%s)"%(get_filename(image_filepath1),url1)) image_filepath2, url2 = filepath_url[1][0], filepath_url[1][1] logger.debug("image2: %s (%s)"%(get_filename(image_filepath2),url2)) t1=time.time() similarity = image_similarity_bands_via_numpy(image_filepath1,image_filepath2) duration = "%0.1f"%((time.time() - t1)*1000) logger.debug("image_similarity_bands_via_numpy => %s took %s ms"%(similarity,duration )) t1=time.time() similarity = image_similarity_histogram_via_pil(image_filepath1,image_filepath2) duration = "%0.1f"%((time.time() - t1)*1000) logger.debug("image_similarity_histogram_via_pil => %s took %s ms"%(similarity,duration )) t1=time.time() similarity = image_similarity_vectors_via_numpy(image_filepath1,image_filepath2) duration = "%0.1f"%((time.time() - t1)*1000) logger.debug("image_similarity_vectors_via_numpy => %s took %s ms"%(similarity,duration )) t1=time.time() similarity = image_similarity_greyscale_hash_code(image_filepath1,image_filepath2) duration = "%0.1f"%((time.time() - t1)*1000) logger.debug("image_similarity_greyscale_hash_code => %s took %s ms"%(similarity,duration )) logger.debug("compare images finished") def image_similarity_bands_via_numpy(filepath1, filepath2): import math import operator import numpy image1 = Image.open(filepath1) image2 = Image.open(filepath2) # create thumbnails - resize em image1 = get_thumbnail(image1) image2 = get_thumbnail(image2) # this eliminated unqual images - though not so smarts.... if image1.size != image2.size or image1.getbands() != image2.getbands(): return -1 s = 0 for band_index, band in enumerate(image1.getbands()): m1 = numpy.array([p[band_index] for p in image1.getdata()]).reshape(*image1.size) m2 = numpy.array([p[band_index] for p in image2.getdata()]).reshape(*image2.size) s += numpy.sum(numpy.abs(m1-m2)) return s def image_similarity_histogram_via_pil(filepath1, filepath2): from PIL import Image import math import operator image1 = Image.open(filepath1) image2 = Image.open(filepath2) image1 = get_thumbnail(image1) image2 = get_thumbnail(image2) h1 = image1.histogram() h2 = image2.histogram() rms = math.sqrt(reduce(operator.add, list(map(lambda a,b: (a-b)**2, h1, h2)))/len(h1) ) return rms def image_similarity_vectors_via_numpy(filepath1, filepath2): # source: http://www.syntacticbayleaves.com/2008/12/03/determining-image-similarity/ # may throw: Value Error: matrices are not aligned . import Image from numpy import average, linalg, dot import sys image1 = Image.open(filepath1) image2 = Image.open(filepath2) image1 = get_thumbnail(image1, stretch_to_fit=True) image2 = get_thumbnail(image2, stretch_to_fit=True) images = [image1, image2] vectors = [] norms = [] for image in images: vector = [] for pixel_tuple in image.getdata(): vector.append(average(pixel_tuple)) vectors.append(vector) norms.append(linalg.norm(vector, 2)) a, b = vectors a_norm, b_norm = norms # ValueError: matrices are not aligned ! res = dot(a / a_norm, b / b_norm) return res def image_similarity_greyscale_hash_code(filepath1, filepath2): # source: http://blog.safariflow.com/2013/11/26/image-hashing-with-python/ image1 = Image.open(filepath1) image2 = Image.open(filepath2) image1 = get_thumbnail(image1, greyscale=True) image2 = get_thumbnail(image2, greyscale=True) code1 = image_pixel_hash_code(image1) code2 = image_pixel_hash_code(image2) # use hamming distance to compare hashes res = hamming_distance(code1,code2) return res def image_pixel_hash_code(image): pixels = list(image.getdata()) avg = sum(pixels) / len(pixels) bits = "".join(map(lambda pixel: '1' if pixel < avg else '0', pixels)) # '00010100...' hexadecimal = int(bits, 2).__format__('016x').upper() return hexadecimal def hamming_distance(s1, s2): len1, len2= len(s1),len(s2) if len1!=len2: "hamming distance works only for string of the same length, so i'll chop the longest sequence" if len1>len2: s1=s1[:-(len1-len2)] else: s2=s2[:-(len2-len1)] assert len(s1) == len(s2) return sum([ch1 != ch2 for ch1, ch2 in zip(s1, s2)]) def get_thumbnail(image, size=(128,128), stretch_to_fit=False, greyscale=False): " get a smaller version of the image - makes comparison much faster/easier" if not stretch_to_fit: image.thumbnail(size, Image.ANTIALIAS) else: image = image.resize(size); # for faster computation if greyscale: image = image.convert("L") # Convert it to grayscale. return image def mkdir_p_filepath(path): dirpath = os.path.dirname(os.path.abspath(path)) mkdir_p(dirpath) def mkdir_p(path): import errno try: os.makedirs(path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def get_filename(path): # cross plattform filename from a given path # source: http://stackoverflow.com/questions/8384737/python-extract-file-name-from-path-no-matter-what-the-os-path-format import ntpath head, tail = ntpath.split(path) return tail or ntpath.basename(head) if __name__ == "__main__": main() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
a@t400:~/lab/image-similarity$ python image_similarity.py 2013-12-22 20:28:19,507 - image_similarity.py:32 - image file location: /tmp/imagesimilarity/ 2013-12-22 20:28:19,507 - image_similarity.py:55 - ******************** 2013-12-22 20:28:19,507 - image_similarity.py:56 - compare images start 2013-12-22 20:28:19,509 - image_similarity.py:59 - image1: 01-elephant.png (http://www.linuxfestnorthwest.org/sites/default/files/sponsors/elephant.png) 2013-12-22 20:28:19,509 - image_similarity.py:62 - image2: 02-elephant.png (http://www.linuxfestnorthwest.org/sites/default/files/sponsors/elephant.png) 2013-12-22 20:28:19,958 - image_similarity.py:69 - image_similarity_bands_via_numpy => 0 took 448.1 ms 2013-12-22 20:28:20,242 - image_similarity.py:76 - image_similarity_histogram_via_pil => 0.0 took 284.0 ms 2013-12-22 20:28:21,379 - image_similarity.py:83 - image_similarity_vectors_via_numpy => 1.0 took 1136.5 ms 2013-12-22 20:28:21,676 - image_similarity.py:90 - image_similarity_greyscale_hash_code => 0 took 296.9 ms 2013-12-22 20:28:21,676 - image_similarity.py:93 - compare images finished 2013-12-22 20:28:21,676 - image_similarity.py:55 - ******************** 2013-12-22 20:28:21,676 - image_similarity.py:56 - compare images start 2013-12-22 20:28:21,677 - image_similarity.py:59 - image1: 03-elephant.png (http://www.linuxfestnorthwest.org/sites/default/files/sponsors/elephant.png) 2013-12-22 20:28:21,677 - image_similarity.py:62 - image2: 04-220px-Postgresql_elephant.svg_.png (http://terminaltwister.com/wp-content/uploads/2013/09/220px-Postgresql_elephant.svg_.png) 2013-12-22 20:28:21,831 - image_similarity.py:69 - image_similarity_bands_via_numpy => -1 took 153.8 ms 2013-12-22 20:28:21,985 - image_similarity.py:76 - image_similarity_histogram_via_pil => 509.38197848 took 154.5 ms 2013-12-22 20:28:23,094 - image_similarity.py:83 - image_similarity_vectors_via_numpy => 0.809369331383 took 1108.0 ms 2013-12-22 20:28:23,260 - image_similarity.py:90 - image_similarity_greyscale_hash_code => 3207 took 166.5 ms 2013-12-22 20:28:23,261 - image_similarity.py:93 - compare images finished |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# -*- coding: utf-8 -*- from __future__ import print_function import time import sys import cPickle from time import strftime import inspect import traceback """ Decorator Resources http://www.ellipsix.net/blog/2010/8/more-python-voodoo-optional-argument-decorators.html http://stackoverflow.com/questions/3931627/how-to-build-a-python-decorator-with-optional-parameters/3931903#3931903 http://typeandflow.blogspot.com/2011/06/python-decorator-with-optional-keyword.html http://pko.ch/2008/08/22/memoization-in-python-easier-than-what-it-should-be/ http://wiki.python.org/moin/PythonDecoratorLibrary#Memoize http://wiki.python.org/moin/PythonDecoratorLibrary#Asynchronous_Call http://code.activestate.com/recipes/496879-memoize-decorator-function-with-cache-size-limit/ """ def main(): if True: pass return bb="ten" aa="wow" bigboy(bb, aa) def bigboy(tima, boo): y = "gee man" fooo(y) trace(msg="great") import datetime print(datetime.datetime.now()) for i in range(0,5): from time import sleep sleep(1) print(meo()) def fooo(x): #trace_inspect() try: print(1/0) except ZeroDivisionError as e: #print_exc_plus() print_ex() #frames = inspect.trace() #argvalues = inspect.getargvalues(frames[0][0]) #print("Argvalues: ", inspect.formatargvalues(*argvalues)) def timing_old_old(f): def wrapper(*arg): t = time.clock() try: return f(*arg) finally: print(str(env('PATH_INFO'))+"\t"+str(f.func_name)+"\t"+str((time.clock()-t)*1000)) return wrapper def decoratorFunctionWithArguments(arg1=None): def wrap(f): print ("Inside wrap(arg1:"+str(arg1)+")") def inner_wrap(*args): print ("Inside wrapped_f()") print("Decorator arguments:"+str(arg1)) res = f(*args) print (res) return res return inner_wrap return wrap def print_exc_plus(): """ Print the usual traceback information, followed by a listing of all the local variables in each frame. """ tb = sys.exc_info()[2] stack = [] while tb: f = tb.tb_frame while f: stack.append(f) f = f.f_back tb = tb.tb_next traceback.print_exc() print ("Locals by frame, innermost last") for frame in stack: if frame.f_code.co_name == "<module>": # so it does not dump globals continue print("Frame %s in %s at line %s" % (frame.f_code.co_name, frame.f_code.co_filename, frame.f_lineno)) for key, value in frame.f_locals.items(): strx = "\t%20s = " % key #We have to be careful not to cause a new error in our error #printer! Calling str() on an unknown object could cause an #error we don't want. try: strx +=str( value) except: strx += "<ERROR WHILE PRINTING VALUE>, " print(strx) #def print_ex_local(): # import tracebackturbo as traceback # print(traceback.format_exc(with_vars=True)) def print_ex(): cla, exc, exc_traceback= sys.exc_info() exc_args = exc.__dict__["args"] if "args" in exc.__dict__ else "<no args>" ex_title = cla.__name__+"exc:"+str(exc)+" - args:"+str(exc_args) msgs = [ex_title] except_location ="" tb = traceback.extract_tb(exc_traceback) for filename,lineno,fn,exec_line in tb: # ('gdecorators.py', 60, 'newfoo', 'print(1/0)') except_location += filename+":"+str(lineno)+" - "+str(fn)+" - "+exec_line stacks = inspect.stack() for i in range(0, len(stacks)): stack = stacks[i] (first_val, filename,linenumber,module,fn,_) = stack linenumber = stacks[i+1][2] if i<len(stacks)-1 else "?" # overwrite line number params = inspect.getargvalues(first_val) msgs.append( filename+":"+str(linenumber)+" - "+str(module)+str( inspect.formatargvalues(*params))) msgs.insert(2, except_location) time = strftime("%Y-%m-%d %H:%M:%S") print(time +" - "+("\n".join(msgs))) def print_except_alternative(maxTBlevel=5): cla, exc, trbk = sys.exc_info() excName = cla.__name__ try: excArgs = exc.__dict__["args"] except KeyError: excArgs = "<no args>" excTb = traceback.format_tb(trbk, maxTBlevel) return (excName, excArgs, excTb) """ Tips: http://stackoverflow.com/questions/6061744/how-to-get-value-of-arguments-in-functions-at-stack http://stackoverflow.com/questions/1650713/get-last-functions-call-arguments-from-traceback http://bip.weizmann.ac.il/course/python/PyMOTW/PyMOTW/docs/traceback/index.html """ import inspect def trace_inspect(): stacks = inspect.stack() msgs =[] for i in range(0, len(stacks)): stack = stacks[i] (first_val, filename,linenumber,module,fn,_) = stack linenumber = stacks[i+1][2] if i<len(stacks)-1 else "?" # overwrite line number params = inspect.getargvalues(first_val) msgs.append( filename+":"+str(linenumber)+" - "+str(module)+str( inspect.formatargvalues(*params))) #print("\n".join(msgs)) time = strftime("%Y-%m-%d %H:%M:%S") print(time +" - "+("\n".join(msgs))) def trace(msg=None, params=True): show_stack(inspect.stack()[1], msg, params) def show_stack(stack, msg="", params=True): (first_val, filename,linenumber,module,fn,_) = stack parameters = inspect.getargvalues(first_val) msg = " - "+str(msg) if msg else "" strx = strftime("%Y-%m-%d %H:%M:%S")+" - " strx += filename+":"+str(linenumber)+msg+" - "+str(module) if params: strx += str( inspect.formatargvalues(*parameters)) print(strx) def nprint(element): from pprint import pprint import sys, StringIO out = StringIO.StringIO() pprint(element,out) return str(out.getvalue()) # http://www.siafoo.net/article/68 # being nice to subsequent decorator calls or chains, otherwise losing the name of the function def timing(result=True, params=True, start=False): def timing_decorator(f): def wrapper(*args, **kw): t1 = time.time() function_result = None try: function_result = f(*args, **kw) return function_result finally: t2 = time.time() from time import strftime strx = strftime("%Y-%m-%d %H:%M:%S") if params and result: if args and kw: strx +=' - % 8.f ms - %s(%r, %r) => %s' % ((t2-t1)*1000.0, f.func_name, str(args), str(kw), str(nprint(function_result))) elif args and not kw: strx +=' - % 8.f ms - %s(%r) => %s' % ((t2-t1)*1000.0, f.func_name, str(args), str(nprint(function_result))) elif not args and kw: strx +=' - % 8.f ms - %s(%r) => %s' % ((t2-t1)*1000.0, f.func_name, str(kw), str(nprint(function_result))) else: strx +=' - % 8.f ms - %s() => %s' % ((t2-t1)*1000.0, f.func_name, str(nprint(function_result))) elif params and not result: if args and kw: strx +=' - % 8.f ms - %s(%r, %r)' % ((t2-t1)*1000.0, f.func_name, str(args), str(kw)) elif args and not kw: strx +=' - % 8.f ms - %s(%r)' % ((t2-t1)*1000.0, f.func_name, str(args)) elif not args and kw: strx +=' - % 8.f ms - %s(%r)' % ((t2-t1)*1000.0, f.func_name, str(kw)) else: strx +=' - % 8.f ms - %s()' % ((t2-t1)*1000.0, f.func_name) elif not params and result: strx +=' - % 8.f ms - %s() => %s' % ((t2-t1)*1000.0, f.func_name, str(nprint(function_result))) else: strx +=' - % 8.f ms - %s()' % ((t2-t1)*1000.0, f.func_name ) if start: d = datetime.fromtimestamp(t1) strx += " start:"+d.strftime("%Y-%m-%d %H:%M:%S") print(strx) wrapper.func_name = f.func_name return wrapper return timing_decorator def memory(ttl=24*60*60*1000): # default 1 day def cache_decorator(f): cache = {} def wrapper(*args, **kw): try: #ttl= 5 * 1000 import cPickle key = f.func_name + cPickle.dumps((args, kw)) # f.func_name + str(args) res = cache.get(key) if res: created, result = res.get('created'), res.get('result') age_ms = (time.time() - created)*1000.0 if age_ms < ttl: return result result = f(*args, **kw) data = {'result': result, 'created': time.time() } cache[key] = data # might need to serialize the data here, just to be sure return result except TypeError: # uncachable -- for instance, passing a list as an argument. # Better to not cache than to blow up entirely. return f(*args, **kw) wrapper.func_name = f.func_name return wrapper return cache_decorator def memoize_limited(function, limit=None): dict = {} list = [] def memoize_wrapper(*args, **kwargs): key = cPickle.dumps((args, kwargs)) try: list.append(list.pop(list.index(key))) except ValueError: dict[key] = function(*args, **kwargs) list.append(key) if limit is not None and len(list) > limit: del dict[list.pop(0)] return dict[key] def propget(func): locals = sys._getframe(1).f_locals name = func.__name__ prop = locals.get(name) if not isinstance(prop, property): prop = property(func, doc=func.__doc__) else: doc = prop.__doc__ or func.__doc__ prop = property(func, prop.fset, prop.fdel, doc) return prop def propset(func): locals = sys._getframe(1).f_locals name = func.__name__ prop = locals.get(name) if not isinstance(prop, property): prop = property(None, func, doc=func.__doc__) else: doc = prop.__doc__ or func.__doc__ prop = property(prop.fget, func, prop.fdel, doc) return prop def propdel(func): locals = sys._getframe(1).f_locals name = func.__name__ prop = locals.get(name) if not isinstance(prop, property): prop = property(None, None, func, doc=func.__doc__) else: prop = property(prop.fget, prop.fset, func, prop.__doc__) return prop import functools def suppress_errors(func=None, log_func=None): """Automatically silence any errors that occur within a function""" def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: if log_func is not None: log_func(str(e)) return wrapper if func is None: return decorator else: return decorator(func) @memory(3*1000) @timing() def meo(): from time import sleep sleep(1) print("hi jimmy") return "done meo" if __name__ == "__main__": main() |
1 2 3 4 5 6 7 8 |
import os def dirsize(start_dirpath): total_size = 0 for dirpath, dirnames, filenames in os.walk(start_dirpath): for filename in filenames: filepath = os.path.join(dirpath, filename) total_size += os.path.getsize(filepath) return total_size |
1 2 3 4 5 6 |
def hbytes(num): for x in ['bytes','KB','MB','GB']: if num < 1024.0: return "%3.1f%s" % (num, x) num /= 1024.0 return "%3.1f%s" % (num, 'TB') |
1 2 3 4 |
def yield_file(filepath): with open(filepath, 'r') as f for line in f: yield line |
1 2 |
for line in yield_file(filepath): do_something(line) # whatever you want :) |
1 2 3 4 |
if b != None: a = b else: a = c |
1 |
a = b or c |
1 2 3 4 5 6 |
# python files, text files and java files grep --color --include="*[py|txt|java]" -rHnE "^import [a-z,\s]+$" * # same as above with extended find . -regextype posix-extended -iregex ".*.py|.*.txt|.*.txt" -exec grep --color -rHnE "import|except" '{}' \; -print #find . -name "*.py" -exec grep --color -rHnE "import|except" '{}' \; -print |
1 2 3 |
pg_dump -p 5432 -U postgres --verbose --inserts \ --format=plain --encoding=utf8 --verbose \ --file DB_NAME.$(date +"%Y%m%d_%H%M").sql DB_NAME |
1 2 3 |
pg_dump -p 5432 -U postgres --verbose --inserts \ --format=plain --encoding=utf8 --verbose \ --file postgres.$(date +"%Y%m%d_%H%M").sql postgres |
1 |
mysqldump -h HOSTNAME -u USERNAME --password=PASSWORD DBNAME > dbdump.sql |
1 |
bzip2 dbdump.sql |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# -*- coding: UTF-8 -*- import os, sys def main(): res = sh('ls -l') print res def sh(self, cmd_arg_str, errout=sys.stderr ): import subprocess r""" Popen a shell -> line or "line1 \n line2 ...", trim last \n """ # crashes after pyqt QApplication() with mac py 2.5.1, pyqt 4.4.2 # subprocess.py _communicate select.error: (4, 'Interrupted system call') # see http://bugs.python.org/issue1068268 subprocess is not EINTR-safe # QProcess instead of Popen works (lines, err) = subprocess.Popen( cmd_arg_str, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) \ .communicate() # wait til process ends if errout and err: print(err, file=errout) # trim the last \n so sh( "ls xx" ) -> "xx" not "xx\n" # and split( "\n" ) -> no extra "" return lines[:-1] if (lines and lines[-1] == "\n") \ else lines if __name__ == '__main__': main() |
1 2 3 4 5 6 7 8 |
CREATE OR REPLACE FUNCTION util.hamming_distance (s1 text, s2 text) RETURNS integer /* select * from util.hamming_distance ('hella3', 'hillo2') */ AS $$ return sum([ch1 != ch2 for ch1, ch2 in zip(s1, s2)]) $$ LANGUAGE plpythonu; |
1 2 3 4 5 6 7 8 9 |
def hamming_distance(s1, s2): assert len(s1) == len(s2) return sum([ch1 != ch2 for ch1, ch2 in zip(s1, s2)]) def test_hamming_distance(): s1,s2="0102304", "9375304" res = hamming_distance(s1, s2) print(res) print((s1,list(zip(s1,s2)))) |