#!/usr/bin/perl -w # This script is Public Domain. # Original version: Hanno Hecker (ankh-morph.org) # Changes: Stephan Knauss (osm-tools.org) use strict; use Date::Parse; use POSIX qw(strftime); use File::stat; use Time::localtime; use LWP::Simple; # list ist updated every hour on the server. So no need to recheck more often than every 30 minutes if (stat("users_agreed.txt")->mtime < time-30*60) { my $res; print STDERR "downloading user acceptance data from server...\n"; $res = getstore('http://planet.openstreetmap.org/users_agreed/users_agreed.txt', 'users_agreed.txt'); if (is_error($res)) { print STDERR "failed downloading users_agreed.txt: ".status_message($res)."\n"; exit 1; } $res = getstore('http://planet.openstreetmap.org/users_agreed/users_disagreed.txt', 'users_disagreed.txt'); if (is_error($res)) { print STDERR "failed downloading users_disagreed.txt: ".status_message($res)."\n"; exit 1; } print STDERR "... done.\n"; } else { print STDERR "Acceptance data still current, using cached copy.\n"; } my %accepted; my $acceptedDate = stat("users_agreed.txt")->mtime; open ACCEPT, "users_agreed.txt"; while () { next unless /^\s*(\d+)\s*$/; $accepted{$1} = 1; } close ACCEPT; my %declined; my $declinedDate = stat("users_disagreed.txt")->mtime; open DECLINE, "users_disagreed.txt"; while () { next unless /^\s*(\d+)\s*$/; $declined{$1} = 1; } close DECLINE; my %users = (); my %stats = (); my $newestEdit = ''; $stats{loss_node} = 0; $stats{loss_way} = 0; $stats{loss_relation} = 0; $stats{unknown_node} = 0; $stats{unknown_way} = 0; $stats{unknown_relation} = 0; print STDERR "Reading OSM data...\n"; while () { if (/<(node|way|relation) /) { my $what = $1; $stats{"all_$what"}++; my ($user, $uid, $version, $ts); if (/user="([^"]+)"/) { $user = $1; } else { $user = "UNKNOWN (old anonymous edit)"; } # manual adjust changed user names if ($user eq 'Sam Kuat') { $user = 'Tom Layo'; } my $currentUser = \$users{$user}; $$currentUser->{$what}++; if (/uid="(\d+)"/) { $uid = $1; } else { $uid = 0; } $$currentUser->{uid} = $uid; # first occurence of user. determine ODbL status if (!exists $$currentUser->{odbl}) { $stats{user_all}++; if (!exists $$currentUser->{node}) { $$currentUser->{node} = 0; # ensure that node counter exists } if ($uid >= 286582 || exists $accepted{ $uid }) { $stats{user_odbl}++; $$currentUser->{odbl} = 1; } elsif (exists $declined{$uid}) { $stats{user_declined}++; $$currentUser->{odbl} = -1; } else { $$currentUser->{odbl} = 0; $stats{user_unknown}++; } } if (/version="(\d+)"/) { $version = $1; } else { $version = 0; } # version is 1 -> ODbL commitment of user is definitly valid for this if ($version == 1) { $$currentUser->{"sole_$what"}++; } if ($$currentUser->{odbl} == 1) { if ($version == 1) { ++$stats{"ok_$what"}; } else { ++$stats{"partial_$what"}; } } elsif ($$currentUser->{odbl} == -1) { ++$stats{"loss_$what"}; } elsif ($version == 1) { ++$stats{"unknown_$what"}; } if (/timestamp="([^"]+)"/) { $ts = $1; if ($ts gt $newestEdit) { $newestEdit = $ts; } } if (!$ts) { $ts = 0; } if (!exists $$currentUser->{last}) { $$currentUser->{last} = $ts; } elsif ($$currentUser->{last} lt $ts) { $$currentUser->{last} = $ts; } } } print " \n"; my %rel = map { ($_, $users{$_}->{relation}) } keys %users; my %way = map { ($_, $users{$_}->{way}) } keys %users; my %node = map { ($_, $users{$_}->{node}) } keys %users; print "

Global statistics

\n"; print "Statistics based on edits up to: ".strftime("%Y-%m-%d", gmtime(str2time($newestEdit)))."
\n"; print "Agreed ODbL users up to: ".strftime("%Y-%m-%d", gmtime($acceptedDate))."
\n"; print "Declined ODbL users up to: ".strftime("%Y-%m-%d", gmtime($declinedDate))."

\n"; printf "Total nodes in DB: $stats{all_node}
- of this safe for ODbL: %d (%.2f%%)
- of this possibly ODbL: %d (%.2f%%)
- of this possible loss: %d (%.2f%%)
- of this definitive loss: %d (%.2f%%)

\n", $stats{ok_node}, ($stats{ok_node}/$stats{all_node})*100, $stats{partial_node}, ($stats{partial_node}/$stats{all_node})*100, $stats{unknown_node}, ($stats{unknown_node}/$stats{all_node})*100, $stats{loss_node}, ($stats{loss_node}/$stats{all_node})*100; printf "Total ways in DB: $stats{all_way}
- of this safe for ODbL: %d (%.2f%%)
- of this possibly ODbL: %d (%.2f%%)
- of this possible loss: %d (%.2f%%)
- of this definitive loss: %d (%.2f%%)

\n", $stats{ok_way}, ($stats{ok_way}/$stats{all_way})*100, $stats{partial_way}, ($stats{partial_way}/$stats{all_way})*100, $stats{unknown_way}, ($stats{unknown_way}/$stats{all_way})*100, $stats{loss_way}, ($stats{loss_way}/$stats{all_way})*100; printf "Total relations in DB: $stats{all_relation}
- of this safe for ODbL: %d (%.2f%%)
- of this possibly ODbL: %d (%.2f%%)
- of this possible loss: %d (%.2f%%)
- of this definitive loss: %d (%.2f%%)

\n", $stats{ok_relation}, ($stats{ok_relation}/$stats{all_relation})*100, $stats{partial_relation}, ($stats{partial_relation}/$stats{all_relation})*100, $stats{unknown_relation}, ($stats{unknown_relation}/$stats{all_relation})*100, $stats{loss_relation}, ($stats{loss_relation}/$stats{all_relation})*100; printf "Total users in DB: %d
- of this already agreed to ODbL: %d (%.2f%%)
- of this not yet answered: %d (%.2f%%)
- of this actively declined new CTs: %d (%.2f%%)

\n", $stats{user_all}, $stats{user_odbl}, ($stats{user_odbl}/$stats{user_all})*100, $stats{user_unknown}, ($stats{user_unknown}/$stats{user_all})*100, $stats{user_declined}, ($stats{user_declined}/$stats{user_all})*100; print "

"; print "
safe for ODbL:
an element is considered safe in case it exists in version 1 and the user already agreed to ODbL
"; print "
possible loss:
an element is possibly lost in case it exists in version 1 and the user has not yet answered
"; print "
possibly ODbL:
the element has a version greater 1. The last editor agreed to ODbL. As this statistics does not evaluate the history it could be possible that the element was created/modified by a user not agreed to ODbL. In this case the whole or parts of the element would be lost. In case all previous versions are done by users that agreed to ODbL (or changes are irrelevant because replaced by newer changes) these elements are safe for ODbL. This is the optimistic assumption most users agree to ODbL.
\n"; print "
definitive loss:
the element was created/editied by a user who actively declined ODbL. In case of license switch this object is removed/edits reverted
\n"; print "
\n"; print "
Last version by user:
The last modification of the element was done by the given user.
\n"; print "
Sole version by user:
The element exists in version 1 and was created by the given user.
\n"; print "
\n"; print "

Detailed statistics

\n"; my ($node_odbl, $node_sole, $node_noodbl, $way_odbl, $way_sole, $way_noodbl, $rel_odbl, $rel_sole, $rel_noodbl); my $rank=0; print "\n"; print "\n"; print "\n"; foreach (sort { $node{$b} <=> $node{$a} } grep { defined $node{$_} } keys %node) { $rank++; print " {odbl} == '1' ? "is_odbl" : ($users{$_}->{odbl} == '-1' ? "no_odbl" : "")) ,"\">\n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; } print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; printf "\n",$stats{ok_node}+$stats{unknown_node}; printf "\n",($stats{ok_node}+$stats{unknown_node})*100/$stats{all_node}; printf "\n",$stats{ok_way}+$stats{unknown_way}; printf "\n",($stats{ok_way}+$stats{unknown_way})*100/$stats{all_way}; printf "\n",$stats{ok_relation}+$stats{unknown_relation}; printf "\n",($stats{ok_relation}+$stats{unknown_relation})*100/$stats{all_relation}; print "\n"; print "\n"; print "
Last version by user (not relevant for ODbL)Sole version by user
RankUser (uid)ODbLnodesnodes %waysways %relationsrelations %nodesnodes %waysways %relationsrelations %last edit
$rank$_ ($users{$_}->{uid})",$users{$_}->{odbl},"",node_perc($node{$_}) ,"",way_perc($way{$_}) ,"",rel_perc($rel{$_}) ,"",node_perc($users{$_}->{sole_node}) ,"",way_perc($users{$_}->{sole_way}) ,"",rel_perc($users{$_}->{sole_relation}) ,"",strftime("%Y-%m-%d", gmtime(str2time($users{$_}->{last}))),"
Total sums$stats{user_all}$stats{all_node}100%$stats{all_way}100%$stats{all_relation}100%%d%.2f%%%d%.2f%%%d%.2f%%
\n"; print "

Statistics created by Stephan Knauss\n"; print "\n"; sub node_perc { my $num = shift; return "  " unless $num; return sprintf "%d%.2f%%", $num, ($num/$stats{all_node}) * 100; } sub way_perc { my $num = shift; return "  " unless $num; return sprintf "%d%.2f%%", $num, ($num/$stats{all_way}) * 100; } sub rel_perc { my $num = shift; return "  " unless $num; return sprintf "%d%.2f%%", $num, ($num/$stats{all_relation}) * 100; }