Differences

This shows you the differences between two versions of the page.

software_engineering:languages [2012/07/06]
emilmont
software_engineering:languages [2013/01/08] (current)
emilmont
Line 2: Line 2:
===== Open Source Activity ===== ===== Open Source Activity =====
The 9 languages with more users in open source according to <html><a href ="http://www.ohloh.net"><img src="http://www.ohloh.net/images/badges/mini.gif" width="80" height="15" /></a></html>: The 9 languages with more users in open source according to <html><a href ="http://www.ohloh.net"><img src="http://www.ohloh.net/images/badges/mini.gif" width="80" height="15" /></a></html>:
 +  - JavaScript
  - C   - C
  - Java   - Java
-  - JavaScript 
  - C++   - C++
  - Python   - Python
  - PHP   - PHP
 +  - C#
 +  - Ruby
  - Perl   - Perl
-  - Ruby 
-  - C# 
-GitHub repository: https://github.com/emilmont/LangPop +GitHub repository: 
- +  * [[https://github.com/emilmont/Experiments/blob/master/langpop.py|Language Popularity]] 
-<code python> +  * [[https://github.com/emilmont/Algorithms/blob/master/algorithms/clusters/hierarchical.py|Hierarchical Clustering]]
-from numpy import array, mean +
-from numpy.linalg import norm +
- +
-class Item: +
-    def __init__(self, name, values): +
-        self.name = name +
-        self.values = array(values) +
-     +
-    def distance(self, other): +
-        return norm(self.values - other.values) +
-     +
-    def __cmp__(self, other): +
-        return cmp(norm(self.values), norm(other.values)) +
- +
-class Group(Item): +
-    def __init__(self, items): +
-        self.items = items +
-        self.values = mean(array([i.values for i in self.items]), axis=0) +
-     +
-    def __str__(self): +
-        self.items.sort(reverse=True) +
-        return "[%s]" % ', '.join([i.name for i in self.items]) +
- +
-def pop_items(l, indexes): +
-    indexes.sort(reverse=True) +
-    return [l.pop(index) for index in indexes] +
- +
-def find_clusters(items, cluster_num): +
-    # Initially create a group for each item +
-    groups = [Group([item]) for item in items] +
-    cache_dist = {} +
-     +
-    # Iterate until the number of groups match the desired number of clusters +
-    while len(groups) > cluster_num: +
-         +
-        # Find the closest pair of groups +
-        closest_pair, shortest_distance = None, None +
-        for a in range(len(groups)): +
-            for b in range(a+1, len(groups)): +
-                 +
-                # Calculate distance +
-                dist_id = (id(groups[a]), id(groups[b])) +
-                if dist_id not in cache_dist: +
-                    cache_dist[dist_id] = groups[a].distance(groups[b]) +
-                distance = cache_dist[dist_id] +
-                 +
-                # Keep shortest distance +
-                if shortest_distance is None or distance < shortest_distance: +
-                    shortest_distance = distance +
-                    closest_pair = [a, b] +
-         +
-        # Merge the closest pair of groups +
-        a, b = pop_items(groups, closest_pair) +
-        groups.append(Group(a.items + b.items)) +
-     +
-    return groups +
-</code> +
- +
-<code python> +
-#!/usr/bin/python +
-from urllib import urlencode, urlopen +
-from os.path import exists, join +
-from os import makedirs +
-from hashlib import md5 +
-from collections import defaultdict +
-import xml.etree.ElementTree as et +
-from datetime import date +
-from argparse import ArgumentParser +
- +
-from cluster import Item, find_clusters +
- +
-NOT_GENERAL_PROGRAMMING = [ +
-    'HTML', 'CSS', 'Haml', 'ClearSilver', +
-    'XML', 'XSL Transformation', 'XML Schema', 'MXML', 'XAML', 'QML', +
-    'shell script', 'DOS batch script', 'AWK', 'Vim Script', 'DCL', 'NSIS', +
-    'Make', 'Automake', 'Autoconf', 'Ebuild', 'CMake', 'Exheres', 'Jam', +
-    'TeX/LaTeX', 'MetaFont', 'MetaPost', +
-    'SQL', 'IDL/PV-WAVE/GDL', +
-    'Assembly', 'OpenGL Shading', 'CUDA', +
-    'Matlab', 'Octave', 'R', 'Scilab', +
-    'Stratego', 'Puppet', 'VHDL', +
-+
-ALIAS = { +
-    'C/C++':'C++', +
-    # Lisp family +
-    'Emacs Lisp': 'Lisp', 'Scheme': 'Lisp', 'Racket': 'Lisp', 'Clojure': 'Lisp', +
-    # Fortran Family +
-    'Fortran (Fixed-format)': 'Fortran', 'Fortran (Free-format)': 'Fortran', +
-    # Basic Family +
-    'Visual Basic': 'Basic', 'Structured Basic': 'Basic', 'Classic Basic': 'Basic', +
-    # Modula Family +
-    'Modula-2': 'Modula', 'Modula-3': 'Modula', 'Oberon': 'Modula' +
-+
-THRESHOLD = 0.45 +
-  +
-def get_top_languages(key, sort): +
-    languages = defaultdict(int) +
-    params = { +
-        'api_key': key, +
-        'sort'   : sort, +
-        'page'   : 1 +
-    } +
-    while True: +
-        query = sorted(params.items()) +
-        url = "http://www.ohloh.net/languages.xml?%s" % urlencode(query) +
-        date_dir = date.today().strftime("%y_%m_%d") +
-        filename = md5(url).hexdigest() + '.xml' +
-        cache_dir = join('data', date_dir) +
-        if not exists(cache_dir): +
-            makedirs(cache_dir) +
-        cache_file = join(cache_dir, filename) +
-        if exists(cache_file): +
-            print 'loading:', cache_file +
-            xml = open(cache_file).read() +
-        else: +
-            print 'request:', url +
-            xml = urlopen(url).read() +
-            open(cache_file, 'w').write(xml) +
-  +
-        root = et.fromstring(xml) +
-        error = root.find("error") +
-        if error != None: +
-            raise Exception('Ohloh Error:', et.tostring(error)) +
-  +
-        if root.find("items_returned").text == "0": +
-            break +
-  +
-        for lang_node in root.findall("result/language"): +
-            name = lang_node.find('nice_name').text +
-            if name in NOT_GENERAL_PROGRAMMING: +
-                continue +
-            if name in ALIAS: +
-                name = ALIAS[name] +
-            value = int(lang_node.find(sort).text) +
-            languages[name] += value +
-  +
-        params['page'] += 1 +
-  +
-    return sorted([(c, l) for l, c in languages.iteritems()], reverse=True) +
-  +
-  +
-if __name__ == '__main__': +
-    parser = ArgumentParser(description='Download the Ohloh languages statistics') +
-    parser.add_argument('-k', '--key', help='The Ohloh API key', required=True) +
-    parser.add_argument('-s', '--sort', help='The sorting field', +
-            choices=['contributors', 'commits', 'code'], default='contributors') +
-    args = parser.parse_args() +
-     +
-    languages = get_top_languages(args.key, args.sort) +
-    unit = 100.0 / float(languages[0][0]) +
-     +
-    items = [] +
-    for i, (value, lang) in enumerate(languages): +
-        n = value * unit +
-        if n < THRESHOLD: break +
-        print '%2d) %.2f - %s' % (i+1, n, lang) +
-        items.append(Item(lang, (n,))) +
-     +
-    # Find Popularity Clusters +
-    cluster_names = ('Ubiquitous', 'Very Popular', 'Popular', 'Niche') +
-    clusters = find_clusters(items, len(cluster_names)) +
-    clusters.sort(reverse=True) +
-     +
-    print "\nPopularity Clusters:" +
-    for i, label in enumerate(cluster_names): +
-        print "%s: %s" % (label, clusters[i]) +
-</code>+
<code> <code>
- 1) 100.00 - C + 1) 100.00 - JavaScript 
- 2) 99.73 - Java + 2) 99.99 - C 
- 3) 98.37 - JavaScript + 3) 99.83 - Java 
- 4) 85.98 - C++ + 4) 86.29 - C++ 
- 5) 66.60 - Python + 5) 69.39 - Python 
- 6) 52.69 - PHP + 6) 52.13 - PHP 
- 7) 35.45 - Perl + 7) 36.83 - C# 
- 8) 34.42 - Ruby + 8) 34.84 - Ruby 
- 9) 29.90 - C# + 9) 34.39 - Perl 
-10) 11.49 - Objective-C +10) 12.41 - Objective-C 
-11) 11.06 - Lisp +11) 11.01 - Lisp 
-12) 9.99 - Modula +12) 9.03 - Modula 
-13) 8.34 - ActionScript +13) 7.98 - ActionScript 
-14) 5.68 - Basic +14) 6.12 - Basic 
-15) 5.39 - Lua +15) 5.31 - Lua 
-16) 3.85 - Pascal+16) 3.99 - Pascal
17) 3.72 - D 17) 3.72 - D
-18) 3.30 - Groovy +18) 3.45 - Groovy 
-19) 3.23 - Fortran +19) 3.25 - Fortran 
-20) 3.20 - Tcl +20) 3.09 - Tcl 
-21) 2.86 - Haskell +21) 2.84 - Haskell 
-22) 2.26 - Scala +22) 2.44 - Scala 
-23) 1.69 - Erlang +23) 2.23 - CoffeeScript 
-24) 1.53 - Objective Caml +24) 1.72 - Erlang 
-25) 1.08 - CoffeeScript +25) 1.54 - Objective Caml 
-26) 0.84 - Ada +26) 0.81 - Ada 
-27) 0.66 - Vala +27) 0.75 - Go 
-28) 0.63 - Go +28) 0.74 - F# 
-29) 0.55 - F# +29) 0.68 - Vala 
-30) 0.49 - Eiffel+30) 0.46 - Eiffel
31) 0.45 - HaXe 31) 0.45 - HaXe
Popularity Clusters: Popularity Clusters:
-Ubiquitous: [C, Java, JavaScript, C++] +Ubiquitous: [JavaScript, C, Java, C++] 
-Very Popular: [Python, PHP+Very Popular: [Python] 
-Popular: [Perl, Ruby, C#] +Popular: [PHP, C#, Ruby, Perl
-Niche: [Objective-C, Lisp, Modula, ActionScript, Basic, Lua, Pascal, D, Groovy, Fortran, Tcl, Haskell, Scala, Erlang, Objective Caml, CoffeeScript, Ada, Vala, Go, F#, Eiffel, HaXe]+Niche: [Objective-C, Lisp, Modula, ActionScript, Basic, Lua, Pascal, D, Groovy, Fortran, Tcl, Haskell, Scala, CoffeeScript, Erlang, Objective Caml, Ada, Go, F#, Vala, Eiffel, HaXe]
</code> </code>
- 
-Black Duck software provides a wider analysis: [[http://www.blackducksoftware.com/oss/projects|Open Source Project Data]] 
==== Language Analysis ==== ==== Language Analysis ====
Line 236: Line 67:
==== Packages ==== ==== Packages ====
-  * Python: [[http://pypi.python.org/pypi|22169]] +  * Ruby: [[https://rubygems.org/stats|41,284]] 
-  * Perl: [[http://www.cpan.org/modules/01modules.index.html|22115]] +  * Python: [[http://pypi.python.org/pypi|22,169]] 
-  * Haskell: [[http://hackage.haskell.org/packages/archive/pkg-list.html|5500]] +  * Perl: [[http://www.cpan.org/modules/01modules.index.html|22,115]] 
-  * Ruby: [[http://rubygems.org/gems|2576]]+  * Haskell: [[http://hackage.haskell.org/packages/archive/pkg-list.html|5,500]] 
===== Benchmarks ===== ===== Benchmarks =====
software_engineering/languages.1341600112.txt.gz · Last modified: 2012/07/06 by emilmont
CC Attribution-Noncommercial-Share Alike 3.0 Unported
Valid CSS Driven by DokuWiki Recent changes RSS feed Valid XHTML 1.0