-
Notifications
You must be signed in to change notification settings - Fork 0
/
paper.tex
1061 lines (941 loc) · 53 KB
/
paper.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[12pt]{article}
\usepackage{tikz}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{bm}
\usepackage{tcolorbox}
\tcbuselibrary{skins}
\usepackage{lipsum}
\usepackage{listings}
%%
%% Julia definition (c) 2014 Jubobs
%%
\lstdefinelanguage{Julia}%
{morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,%
end,export,false,for,function,immutable,import,importall,if,in,%
macro,module,otherwise,quote,return,switch,true,try,type,typealias,%
using,while},%
sensitive=true,%
alsoother={\$},%
morecomment=[l]\#,%
morecomment=[n]{\#=}{=\#},%
morestring=[s]{"}{"},%
morestring=[m]{'}{'},%
}[keywords,comments,strings]%
\lstset{%
language = Julia,
basicstyle = \ttfamily,
keywordstyle = \bfseries\color{blue},
stringstyle = \color{magenta},
commentstyle = \color{ForestGreen},
showstringspaces = false,
}
\usepackage[linesnumbered]{algorithm2e}
\makeatletter
\renewcommand{\@algocf@capt@plain}{above}
\makeatother
\usetikzlibrary{decorations.pathreplacing}
\usetikzlibrary{arrows}
\usetikzlibrary{snakes}
\usetikzlibrary{calc}
\let\emptyset\varnothing
\definecolor{hotpink}{rgb}{0.9,0,0.5}
\definecolor{deepgray}{gray}{0.35}
\definecolor{deepgray2}{gray}{0.25}
\definecolor{deepgray3}{gray}{0.10}
\definecolor{lightgray}{gray}{0.95}
\definecolor{lightgray2}{gray}{0.85}
\definecolor{purple1}{RGB}{239,229,244}
\definecolor{purple2}{RGB}{216,191,216}
\definecolor{lightblue}{rgb}{0.73,0.33,0.83}
\definecolor{lightpurple}{rgb}{.8,.2,.8}
\definecolor{textcolor}{rgb}{0,0,5}
\definecolor{blue1}{RGB}{187,217,238}
\definecolor{blue2}{RGB}{235,244,250}
\definecolor{yellow1}{RGB}{255,255,102}
\definecolor{blue3}{RGB}{63,40,96}
\definecolor{red1}{RGB}{255, 102, 102}
\definecolor{green1}{RGB}{102, 255, 102}
\newtheorem{lemma}{Lemma}
\newtheorem{theorem}{Theorem}
\newtheorem{definition}{Definition}
\theoremstyle{definition}
\def\proof{\par{\bf Proof}. \ignorespaces}
\def\qedsymbol{\vbox{\hrule\hbox{%
\vrule height1.3ex\hskip0.8ex\vrule}\hrule}}
\def\endproof{\qquad\qedsymbol\medskip\par}
% \usepackage{booktabs} % For formal tables
% \usepackage{cite}
\usepackage{url}
% \newtheorem{definition}{Definition}[section]
% \newtheorem{theorem}{Theorem}[section]
% \usepackage{algorithmic}
% \usepackage[cmex10]{amsmath}
\usepackage{amssymb}
\title{A Closer Look at Network Flow on Evolving Graphs}
\author{Jiahao Chen \thanks{
Capital One,
New York, USA.
\texttt{[email protected]}
}
and
Weijian Zhang
\thanks{%
School of Mathematics,
The University of Manchester,
Manchester, M13 9PL, England.
\texttt{[email protected]}.
}
}
\def\R{\mathbb{R}}
\def\C{\mathbb{C}}
\def\nbyn{n \times n}
\def\mbyn{m \times n}
\def\l{\lambda}
\def\norm#1{\|#1\|}
\def\normi#1{\|#1\|_1}
\def\normo#1{\|#1\|_{\infty}}
\def\Chat{\widehat{C}}
\def\e{eigenvalue}
% \DeclareMathOperator{\diag}{diag} % Requires amsmath.
\def\diag{\mathop{\mathrm{diag}}} % If not using amsmath.
\def\trace{\mathop{\mathrm{trace}}} % If not using amsmath.
\def\At{\widetilde{A}}
\def\normt#1{\|#1\|_2}
% Note: this clashes with amsmath.
\def\bmatrix#1{\left[\matrix{#1}\right]}
\newenvironment{njh}{\begin{quote}\small\sf{\color{blue}$\diamondsuit$~NJH~}}{\end{quote}}
\newenvironment{wz}{\begin{quote}\small\sf{\color{green}$\diamondsuit$~WZ~}}{\end{quote}}
\begin{document}
%\subtitlenote{The full version of the author's guide is available as
% \texttt{acmart.pdf} document}
\maketitle
\begin{abstract}
This paper studies the influence of the temporal information in an evolving graph on the node centrality scores.
In particular, we look at four important centrality measures, namely Katz centrality,
Resolvent Betweenness centrality, Closeness centrality, and PageRank.
We derive evolving graph centrality measures from the perspective of network flow and
observe that the amount of temporal distance between nodes has an important impact on the final evolving graph centrality scores, especially on the top ranked nodes.
Build on this idea, we introduce a generalised PageRank on evolving graph that is based on two ideas:
reverse network flow and block adjacency matrix.
We conduct our experiments on random evolving graphs to illustrate our idea.
\end{abstract}
\section{Introduction}
\label{sec:introduction}
In the 1985 science-fiction film ``Back to the Future'', Marty McFly (played by Michael J. Fox) travelled back in time and accidentally changed history.
However, we cannot travel back in time or even send a message to the past.
To traverse an evolving graph (a time-dependent graph), one has to consider the order of time stamps. In particular, a walk on an evolving graph needs to be time-preserving, meaning we
can not pass a massage (through an edge) to a node at a earlier time.
Time-preversing walks and paths are very important concepts for
network flow on evolving graphs.
% time-respecting walk
We represent an evolving graph as a time-ordered sequences of graphs, similar to the work of Tang and coworkers \cite{ntmm13, tmml09, tmml10, tsmm09} and Grindrod, Higham and coworkers \cite{grihig13,gphe11}. The idea is to divide the system's time span into temporal slices and regard it as a sequence of static graphs $G^{[t]}$, one for each layer. Such a representation arises naturally in many real-world applications. For example,
consider networks of users interacting through messaging. Each interaction between two users $A$ and $B$ is stored with a time stamp $t$. If we represent user $A$ as a node then user $A$ at time stamp $t$ is represented by a pair $(A,t)$ and a message sent from user $A$ to user $B$ at time stamp $t$ can be represented as $\langle (A, t), (B, t) \rangle$.
In general, a node $v$ on $G^{[t]}$ is represented by a pair $(v, t)$, where $t$ is the time stamp of the graph $G^{[t]}$.
A sequence of interactions between users form a walk. Suppose $A$ sent a message to $B$ at time stamp $t_1$
and then $B$ sent this message to $C$ at time stamp $t_2$. We can represent this activity as $\langle (A,t_1), (B,t_1), (B, t_2), (C,t_2)\rangle$, where each pair of two nodes has meanings: $\langle (A,t_1), (B,t_1) \rangle$ and $\langle (B,t_2), (C,t_2) \rangle$ represent interactions between users $A$, $B$, and $C$ and $\langle (B,t_1), (B,t_2) \rangle$ represents the fact that user $B$ holds the message at time stamps $t_1$ and $t_2$.
A time-respecting walk is a sequence of nodes $\langle (v_1, t_1), (v_2, t_2), \ldots ,(v_m, t_m)\rangle$, where $t_1 \le t_2 \le \cdots \le t_n$ are the corresponding time stamps. If $t_i = t_j$, then $\langle (v_i, t_i), (v_j, t_j) \rangle$ is an edge where both nodes are on the same temporal slice, otherwise it is an edge linking nodes from different temporal slices.
We call edges of the first kind \emph{static edges} and of the second kind \emph{causal edges}.
Recall that in static graph, a walk of length $m$ is defined as a sequence of nodes $v_1, v_2, \ldots, v_{m+1}$ such that for each $i = 1, \ldots, m$ there is an edge from $v_i$ to $v_{i+1}$. How can we determine the length of a time-respecting walk?
One could define the length of a time-respecting walk as the total number of temporal slices (see the shortest temporal distance in \cite{tmml09}) or the number of static edges (see the dynamic walk in \cite{gphe11}). This paper studies the impact of these two different definitions. In particular, we study a general definition of temporal distance that takes account of both temporal slices and static edges.
% Related works
Researchers have generalised many static graph centrality algorithms for evolving graphs. One approach is to use the fact that the elements of the matrix product of adjacency matrices at different time stamps can correctly count the number of temporal paths.
However, not every centrality algorithm can be generalised in this way. Another approach is to define evolving graph centrality algorithms using time-respecting paths. This approach is applicable to algorithms that are defined using shortest paths.
% our contribution this paper
Borgatti \cite{borgatti05} noted that the manner of traffic flow entails a new way to think about centrality. In particular, the importance of a node in a network cannot be determined without reference to how traffic flows through the network.
Inspired by this idea, we simulate network flows in evolving graphs to measure the centrality scores.
% Grindrod et al.'s definition of dynamic walk \cite{gphe11} only counts static edges.
% Tang et al. \cite{tang10s} measure a temporal shortest walk in time unit (or causal edges) only.
% If a node can be reached by another node in $k$ time stamps then the distance between the two nodes is $k$. Chen and Zhang \cite{chen16} considered both static edges and causal edges in their definition of temporal walk.
The aims of this paper are as follows. First, to derive important evolving graph centrality from first principles via network flow. Second, to study the impact of temporal distance between nodes on centrality.
Third, to generalise PageRank on evolving graphs.
We carry out the experiment using the Julia dynamic network analysis package
EvolvingGraphs.jl (\url{https://github.com/EtymoIO/EvolvingGraphs.jl}).
We use the research paper data gathered from Etymo (\url{https://etymo.io}), a visual search engine for data scientists.
\section{Motivation: A Closer Look at Katz Centrality on Evolving Graphs}
\label{sec:motivation}
We let $G_n = \langle G^{[1]}, G^{[2]}, \ldots, G^{[n]} \rangle$ be an evolving graph
and $A^{[1]}, A^{[2]}, \dots A^{[n]}$ be the corresponding adjacency matrices.
Then the dynamic communicability matrix is defined as
\begin{equation}
\label{eq:q}
Q^{[k]} = (I - \alpha A^{[1]})^{-1}(I - \alpha A^{[2]})^{-1} \cdots (I - \alpha A^{[n]})^{-1},
\end{equation}
where the parameter $\alpha$ satisfies $\alpha < 1/ \max_k \rho(A^{[k]})$, the spectral radius of $A^{[k]}$.
The $i$th row sum of $Q^{[k]}$ is the broadcast centrality.
Suppose $A$, $B$, and $C$ are three co-workers in a large internet company. Figure \ref{fig:katz_eg}
shows how they exchange ideas during three days at work. The directions of the arrows indicate the direction of the flow of ideas.
For example, $A$ shares a new idea with $B$ on day $1$ and $B$ shares a new idea with $C$ on dat $3$. As a result, $A$'s idea can influence $C$. Here, and in subsequent figures, green filled circles represent active node and red circles represent inactive nodes.
\begin{figure}[h]
\begin{center}
\begin{tikzpicture}[scale=.38, line width =1.5pt]
\node[circle,fill=green1, minimum size=0.2cm] (n7) at (-5,7) {B};
\node[circle,draw=red1, minimum size=0.2cm] (n8) at (-10,5) {C};
\node[circle,fill=green1, minimum size=0.2cm] (n10) at (-8,10) {A};
\node[circle,fill=green1, minimum size=0.2cm] (n6) at (3,7) {B};
\node[circle,fill=green1, minimum size=0.2cm] (n4) at (-2,5) {C};
\node[circle,fill=green1, minimum size=0.2cm] (n1) at (0,10) {A};
\node[circle,fill=green1, minimum size=0.2cm] (n11) at (11,7) {B};
\node[circle,fill=green1, minimum size=0.2cm] (n12) at (6,5) {C};
\node[circle,draw=red1, minimum size=0.2cm] (n14) at (8,10) {A};
\foreach \from/\to in {n10/n7, n1/n4, n4/n1, n11/n12, n1/n6}
\draw[every edge,->] (\from) -- (\to);
%\draw[dotted,->](n4) -- (n12);
% \draw[dotted,->](n7) to[out=80, in=40](n11);
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(-4,3) -- (-10.5,3) node [midway,yshift=-0.6cm]{ $1$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(4,3) -- (-2,3) node [midway,yshift=-0.6cm]{$2$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(11.5,3) -- (5.5,3) node [midway,yshift=-0.6cm]{ $3$};
\end{tikzpicture}
\end{center}
\caption[An evolving directed graph with 3 time stamps $1$, $2$ and $3$.]{An evolving directed graph with 3 time stamps $1$, $2$ and $3$.
At each time stamp, the evolving graph is represented as a graph.
The green filled circles represent active nodes while the red circles represent
inactive nodes. Directed edges in each time stamp are shown as black arrows.}
\label{fig:katz_eg}
\end{figure}
We could use EvolvingGraphs.jl to generate the above evolving graph and compute the Katz centrality.
\begin{lstlisting}
using EvolvingGraphs
using EvolvingGraphs.Centrality
g = EvolvingGraph{Node{String}, Int}()
add_bunch_of_edges!(g, [("A", "B", 1), ("A", "C", 2),
("A", "B", 2),("C", "A", 2),("B", "C", 3)])
\end{lstlisting}
The centrality values for each node are
\begin{lstlisting}
katz(g)
3-element Array{Tuple{EvolvingGraphs.Node{String},Float64},1}:
(Node(A), 0.742301)
(Node(B), 0.42943)
(Node(C), 0.514373)
\end{lstlisting}
As expected $A$ is the most important node in the network. $C$ is the second most important node in the network as it influenced
$A$ at time stamp $2$. If we reverse the time of communication between day $1$ and $3$, i.e., $B$ shares a new idea with $C$ on day $1$ and $A$ shares a new idea with $B$ on day $3$. Then in this case the idea $A$ had on day $3$ cannot pass to $C$. This is illustrated in Figure \ref{fig:katz_eg2}.
\begin{figure}[h]
\begin{center}
\begin{tikzpicture}[scale=.38, line width =1.5pt]
\node[circle,fill=green1, minimum size=0.2cm] (n7) at (-5,7) {B};
\node[circle,fill=green1, minimum size=0.2cm] (n8) at (-10,5) {C};
\node[circle,draw=red1, minimum size=0.2cm] (n10) at (-8,10) {A};
\node[circle,fill=green1, minimum size=0.2cm] (n6) at (3,7) {B};
\node[circle,fill=green1, minimum size=0.2cm] (n4) at (-2,5) {C};
\node[circle,fill=green1, minimum size=0.2cm] (n1) at (0,10) {A};
\node[circle,fill=green1, minimum size=0.2cm] (n11) at (11,7) {B};
\node[circle,draw=red1, minimum size=0.2cm] (n12) at (6,5) {C};
\node[circle,fill=green1, minimum size=0.2cm] (n14) at (8,10) {A};
\foreach \from/\to in {n7/n8, n1/n4, n4/n1, n1/n6, n14/n11}
\draw[every edge,->] (\from) -- (\to);
%\draw[dotted,->](n4) -- (n12);
% \draw[dotted,->](n7) to[out=80, in=40](n11);
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(-4,3) -- (-10.5,3) node [midway,yshift=-0.6cm]{ $1$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(4,3) -- (-2,3) node [midway,yshift=-0.6cm]{ $2$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(11.5,3) -- (5.5,3) node [midway,yshift=-0.6cm]{ $3$};
\end{tikzpicture}
\end{center}
\caption{An evolving directed graph with 3 time stamps $1$, $2$ and $3$.}
\label{fig:katz_eg2}
\end{figure}
In this case, we have
\begin{lstlisting}
g2 = EvolvingGraph{Node{String}, Int}()
add_bunch_of_edges!(g2, [("A", "B", 3), ("A", "C", 2),
("A", "B", 2),("C", "A", 2),("B", "C", 1)])
\end{lstlisting}
The centrality ratings of the three nodes are
\begin{lstlisting}
katz(g2)
3-element Array{Tuple{EvolvingGraphs.Node{String},Float64},1}:
(Node(A), 0.687679)
(Node(B), 0.490062)
(Node(C), 0.535666)
\end{lstlisting}
Notice the rating of $A$ decreased as its influence is not as important as in Figure \ref{fig:katz_eg}.
% does not take account of walks in a long time. cost in time
If the communication between $A$, $B$, and $C$ are not on three consecutive days
but on day $1$, day $10$, and day $100$, as shown in Figure \ref{fig:katz_eg3},
we would expect the centrality scores to change. However, according to \eqref{eq:q}, the scores are unchanged.
We notice that the generalised Katz centrality on evolving graphs only takes account of the edges at each time stamp but not the communication time between edges at different time stamps.
\begin{figure}[h]
\begin{center}
\begin{tikzpicture}[scale=.38, line width =1.5pt]
\node[circle,fill=green1, minimum size=0.2cm] (n7) at (-5,7) {B};
\node[circle,fill=green1, minimum size=0.2cm] (n8) at (-10,5) {C};
\node[circle,draw=red1, minimum size=0.2cm] (n10) at (-8,10) {A};
\node[circle,fill=green1, minimum size=0.2cm] (n6) at (3,7) {B};
\node[circle,fill=green1, minimum size=0.2cm] (n4) at (-2,5) {C};
\node[circle,fill=green1, minimum size=0.2cm] (n1) at (0,10) {A};
\node[circle,fill=green1, minimum size=0.2cm] (n11) at (11,7) {B};
\node[circle,draw=red1, minimum size=0.2cm] (n12) at (6,5) {C};
\node[circle,fill=green1, minimum size=0.2cm] (n14) at (8,10) {A};
\foreach \from/\to in {n7/n8, n1/n4, n4/n1, n1/n6, n14/n11}
\draw[every edge,->] (\from) -- (\to);
%\draw[dotted,->](n4) -- (n12);
% \draw[dotted,->](n7) to[out=80, in=40](n11);
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(-4,3) -- (-10.5,3) node [midway,yshift=-0.6cm]{ $1$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(4,3) -- (-2,3) node [midway,yshift=-0.6cm]{ $10$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(11.5,3) -- (5.5,3) node [midway,yshift=-0.6cm]{ $100$};
\end{tikzpicture}
\end{center}
\caption[An evolving directed graph with 3 time stamps $1$, $10$ and $100$.]{An evolving directed graph with 3 time stamps $1$, $10$ and $100$.
At each time stamp, the evolving graph is represented as a graph.
The green filled circles represent active nodes while the red circles represent
inactive nodes. Directed edges in each time stamp are shown as black arrows.}
\label{fig:katz_eg3}
\end{figure}
\section{Time-Preserving Walks and Paths}
\label{sec:time-pres-paths}
% what is walk and what is path?
A walk of length $\ell$ is a sequence of nodes $v_1, v_2, \ldots, v_l, v_{l+1}$ such that for
each $i = 1, 2, \ldots, \ell$, there is an edge from $v_i$ to $v_{i+1}$. A path of length $\ell$ is a walk of length $\ell$
such that all the nodes are different.
% what is time-preserving walks
Similarly, we define a temporal walk to be a time-ordered sequence of temporal nodes. A temporal path is a temporal walk such that all the temporal nodes are different.
% relate to centrality
The definition of centrality depends on the manner in which traffic flows through a network \cite{borgatti05}.
Time-preserving walks induce temporal Katz centrality and temporal closeness centrality while time-preserving paths induce betweenness centrality and closeness centrality.
We now describe evolving graph network flow in more details. We follow the definition of
\emph{evolving graph} in \cite{chen16}.
\begin{definition}
An \textbf{evolving graph} $G_n$ is a sequence of (static) graphs
$G_n = \langle G^{[1]}, G^{[2]}, \ldots ,G^{[n]} \rangle$ with associated time stamps
$t_1 \leq t_2 \leq \ldots \leq t_n$.
Each $G^{[t]} = (V^{[t]}, E^{[t]})$ represents a (static) graph labelled by a time t.
\end{definition}
Note that the node sets $V^{[t]}$ can change over time, i.e., nodes may appear or disappear at a particular time stamp.
For example, in Figure \ref{fig:eg_shortest_walk}, at time stamp $1998$, $V^{[1998]} = \langle 1, 2 \rangle$ and $E^{[1998]} = \langle (1,2) \rangle$. Each graph $G^{[t]}$ can be represented by its adjacency matrix $A^{[t]}$.
We can represent $G_n$ by a list of adjacency matrices $A_n = \langle A^{[1]}, A^{[2]}, \ldots, A^{[n]} \rangle$.
If we only measure the temporal difference in a temporal walk, we have a new definition of Katz centrality.
In the matrix form, the nonzero entries of $A^{[t_i]} A^{[t_{i+1}]}$ counts all the temporal walks of length $1$, and
$A^{[t_i]}\cdots A^{[t_j]}$, where $i < j$ counts all the temporal walks of length $j -i$.
For example, $A^{[1]}A^{[4]}$, $A^{[1]}A^{[1]}A^{[4]}$ and $A^{[1]}A^{[2]}A^{[3]}A^{[4]}$ count all walks of length 3.
In the original Katz centrality, the `attenuation' factor $\alpha$ is imposed on the
number of paths, i.e., longer paths has smaller effect to the overall rating.
Hence the final result is the summation of all products of the form
\[
\alpha^k A^{[i]} \cdots A^{[i+k]}
\]
Note this is different from \eqref{eq:q} as discussed in Section \ref{sec:temp-katz-centr}.
% Another definition of temporal walk was introduced by \cite{chen16}. Here
% both static edges and causal edges are considered. We can represent an evolving graph by a block adjacency matrix, as shown in Section \ref{sec:centr-block-adjac}. Centrality on this block matrix gives the importance of each node at all time stamps. We can average these rating at different time stamps to
% get the final rating.
\begin{figure}[h]
\begin{center}
\begin{tikzpicture}[scale=.38, line width =1.5pt]
\node[circle,fill=green1, minimum size=0.2cm] (n7) at (-5,7) {2};
\node[circle,draw=red1, minimum size=0.2cm] (n8) at (-10,5) {3};
\node[circle,fill=green1, minimum size=0.2cm] (n10) at (-8,10) {1};
\node[circle,draw=red1, minimum size=0.2cm] (n6) at (3,7) {2};
\node[circle,fill=green1, minimum size=0.2cm] (n4) at (-2,5) {3};
\node[circle,fill=green1, minimum size=0.2cm] (n1) at (0,10) {1};
\node[circle,fill=green1, minimum size=0.2cm] (n11) at (11,7) {2};
\node[circle,fill=green1, minimum size=0.2cm] (n12) at (6,5) {3};
\node[circle,draw=red1, minimum size=0.2cm] (n14) at (8,10) {1};
\node (w1) at (-6, 9) {$w_1$};
\node (w2) at (3, 13) {$w_2$};
\node (w3) at (-2, 8) {$w_3$};
\node (w4) at (2, 4) {$w_4$};
\node (w5) at (8.5, 7) {$w_5$};s
\foreach \from/\to in {n10/n7, n1/n4, n11/n12}
\draw[every edge,->] (\from) -- (\to);
\draw[dotted,->](n4) -- (n12);
\draw[dotted,->](n7) to[out=80, in=40](n11);
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(-4,3) -- (-10.5,3) node [midway,yshift=-0.6cm]{ $1998$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(4,3) -- (-2,3) node [midway,yshift=-0.6cm]{ $1999$};
\draw [decorate,decoration={brace,amplitude=5pt},xshift=-4pt,yshift=0pt]
(11.5,3) -- (5.5,3) node [midway,yshift=-0.6cm]{ $2018$};
\end{tikzpicture}
\end{center}
\caption[An evolving directed graph with 3 time stamps $1998$, $1999$ and $2018$.]{An evolving directed graph with 3 time stamps $1998$, $1999$ and $2018$.
At each time stamp, the evolving graph is represented as a graph. Directed edges in each time stamp are shown as black arrows and directed edges between graphs are shown as dotted arrows, where $w_1, w_2 \ldots w_5$ are edge weights.}
\label{fig:eg_shortest_walk}
\end{figure}
\begin{definition}
A \textbf{temporal node} is a pair (v, t), where $v \in V^{[t]}$ is a node at a time $t$.
\end{definition}
%At each time stamp, we only count temporal nodes that connects by an edge.
\begin{definition}
A temporal node $(v, t)$ is an \textbf{active node} if there exists at least one edge $e \in E^{[t]}$ that connects $v \in V^{[t]}$ to another node $w \in V^{[t]}, w\ne v$. Otherwise it is called an inactive node.
\end{definition}
In Figure \ref{fig:eg_shortest_walk}, the filled green circles are active nodes. For the adjacency matrix representation, if a node $i$ is active at time stamp $t$ then
the $i$th row or the $i$th column of $A^{[t]}$ has at least one none-zero entry.
\begin{definition}
A \textbf{static edge} $\langle (v_i, t), (v_j, t)\rangle$ is a pair of elements of $V^{[t]}$, i.e., $v_i \in V^{[t]}$ and
$v_j \in V^{[t]}$.
A \textbf{causal edge} $\langle (v, t_i), (v, t_j)\rangle$ is a pair of the same node at different time stamps.
\end{definition}
For example, in Figure \ref{fig:eg_shortest_walk}
$\langle (1, 1998), (2, 1998) \rangle$ is a static edge at time stamp $1998$ and $\langle (3, 1999), (3, 2018) \rangle$ is a causal edge between time stamp $1999$ and time stamp $2018$.
The difference between static edges and causal edges was first explicitly considered in \cite{chen16}.
We can denote a \emph{weighted static edge} as $\langle (v_i, t), (v_j, t), w_s \rangle$ and a \emph{weighted causal edge} as $\langle (v, t_i), (v, t_j), w_t \rangle$.
Here $w_s$ represents the spatial distance between the two nodes $v_i$ and $v_j$; $w_t$ represents the
temporal distance of $v$ at different time stamps.
In Figure \ref{fig:eg_shortest_walk}, the black arrows represent static edges and the dotted arrows represent causal edges.
Suppose we assign all the static edge weights to $1$ and
let the causal edge weight be the number of time stamps between the pair of nodes. Then in Figure \ref{fig:eg_shortest_walk}
the edge weight between $(1, 1998)$ and $(2, 1998)$ is $1$ and the edge weight between $(2, 1998)$ and $(2, 2018)$ is $10$.
In general, a weighted time-preserving walk can be defined as follows.
\begin{definition}
A \textbf{weighted time-preserving walk} of length $m$ on an evolving graph $G_n$ is an alternating
time-ordered sequence of active nodes and edges, $\langle (v_1, t_1), e_1, (v_2, t_2), e_2, (v_3, t_3) \ldots, e_m, (v_m, t_m) \rangle$, where $t_1 \le t_2 \le \cdots \le t_m$ and
$v_i = v_j$ if and only if $t_i \ne t_j$, and $e_i$ can be either a weighted static edge or weighted causal edge.
\end{definition}
It is natural to define the length of a walk on a static graph as the number of nodes travelled through the walk. For the weighted graph case, the length of a walk is the total weight of all the edges in the walk. We observe that static edges link nodes in space while causal edges link nodes in time. Hence we would like to distinguish temporal distance from spatial distance in an evolving graph.
For two linked temporal nodes, we set the length of a static edge between them to be one and the length of a causal edge to be the temporal distance between them.
The following Julia function measures the temporal distance between two active nodes in an evolving graph.
\begin{lstlisting}
function temporal_distance(v1::TimeNode,
v2::TimeNode, beta::Real = 1.)
beta* abs(node_timestamp(v1) - node_timestamp(v2))
end
\end{lstlisting}
This function defines the temporal distance to be the difference between their time stamps scaled by a hyper-parameter
\texttt{beta}.
In the simplest case, the time-respecting walk
$\langle (1, 1998) ,(2, 1998) , (2, 2018), (3, 2018)\rangle$ in Figure \ref{fig:eg_shortest_walk} has length $12$ in total because two static edges $\langle (1, 1998) (2, 1998) \rangle$ and $\langle (2, 2018), (3, 2018) \rangle$ each have length one, and $\langle (2, 1998) (2, 2018) \rangle$
traverse $10$ time stamps and thuse has length $10$.
A time-preserving walk (and path) can traverse both causal edges and static edges.
\section{Centrality Measures}
\label{sec:topol-temp-flow}
% what is centrality?
Centrality measures the importance of nodes within a graph.
% current evolving graph centrality approach
To generalize centrality measures for evolving graphs there are two common approaches.
First, we could exploit the fact that matrix product of adjacency matrices at different time stamps counts the number of dynamic walks in an evolving graph. Using this idea, we can generalize walk-based centrality measures such as
the Katz centrality \cite{estrada09} and communicability betweenness \cite{alsayed15}.
Second, we could generalize the definition of shortest paths as shortest temporal paths and replace shortest paths in centrality measures with shortest temporal paths where possible. This way we can define temporal betweenness centrality and temporal closeness centrality \cite{ntmm13}.
% our approach
We observe both approaches ignore the communication time between edges at different time stamps, i.e., the time gap between different (static) graph slices. We take account of this communication time by deriving centrality measures from the original theses of the ideas and show communication time has important impact on the final node ratings (and rankings).
We note that our goal is not to provide efficient centrality algorithms, which is out of the scope of this paper.
\subsection{Temporal Katz Centrality}
\label{sec:temp-katz-centr}
% katz centrality from first principle
Katz centrality accounts the influence of nearest-neighbours to a give node and the influence of other nodes
separated at a certain distance from it. The $k$th power of the adjacency matrix $A$ of a graph $G$ accounts for walk of length $k$. The expansion
\begin{equation}
\label{eq:a}
A^0 + \alpha A + \alpha^2 A^2 + \cdots
\end{equation}
converges to $(I - \alpha A)^{-1}$ when $\alpha < 1/\rho(A)$. The $(i,j)$ entry of \eqref{eq:a}
counts all walks from $i$ and $j$ with the influence of walks of length $k$ scaled by a factor of $\alpha^k$.
The Katz centrality of node $i$ is the $i$th row sum of $(I - \alpha A)^{-1}$, which is the sum of influence of all the nodes that node $i$ can reach via a walk.
In other words, we start with node $i$ and each out-neighbour of node $i$ has influence $\alpha$ on node $i$ and the out-neighbour of out-neighbour of node $i$ has influence $\alpha^2$ on node $i$, and so on.
For evolving graphs, we replace out-neighbours with forward neighbours which preserve the direction of time.
Then the Katz score of a node $i$ at time stamp $t$, denoted by $(i,t)$, is the sum of influence of all the active nodes that $(i,t)$ can reach via a time-preserving walk.
Unlike \cite{gphe11}, we also consider the temporal distance between two nodes. For example, the temporal distance between $(i, 2001)$ and $(i, 2018)$ is $17$.
% julia code implementation
With the definition of \texttt{temporal\_distance} introduced in Section \ref{sec:time-pres-paths}.
We define the temporal Katz score of node $(i, t)$ by the algorithm given in Listing \ref{lst:katz}.
\begin{lstlisting}[caption={Temporal Katz Centrality of Single Node},label={lst:katz}, captionpos=b]
function temporal_katz(g::AbstractEvolvingGraph,
start::TimeNode; alpha = 0.2, max_level = 10)
score = 0.
v = start
fronter = [v]
level = 0
while level < max_level
next = []
for u in fronter
for v in forward_neighbors(g, u)
push!(next, v)
if node_key(v) != node_key(u)
td = temporal_distance(start, u)
d = td + level
score += alpha^d
end
end
end
fronter = next
level += 1
end
return score / num_edges(g)
end
\end{lstlisting}
% explain the code
The algorithm \texttt{temporal\_katz} performs a breadth first search (BFS) on an evolving graph $g$ that
starts at \texttt{TimeNode} $v$, which represents a node at a specific time stamp.
% \footnote{See also \url{https://etymoio.github.io/EvolvingGraphs.jl/latest/base.html#EvolvingGraphs.TimeNode}}.
For each \texttt{TimeNode} $v$, we accumulate the influence of linked nodes according to their spatial and temporal
distance from $v$. The spatial distance is recorded in variable \texttt{level} and the temporal distance is calculated by
\texttt{temporal\_distance}. The algorithm stops searching when \texttt{level} is larger or equal to \texttt{max\_level}.
We set \texttt{alpha} to be $0.2$ and \texttt{max\_level} to be $10$. Then
% example on 3 timestamp evolving graph
for Figure \ref{fig:katz_eg2}, we find the rating of each active node as shown below.
\begin{lstlisting}
TimeNode(A, 2) => 0.466667
TimeNode(A, 3) => 0.2
TimeNode(B, 2) => 0.0
TimeNode(B, 3) => 0.0
TimeNode(B, 1) => 0.202347
TimeNode(C, 1) => 0.0117333
TimeNode(C, 2) => 0.293333
\end{lstlisting}
Here, for example, \texttt{TimeNode(A,2)} represents active node $(A,2)$.
The overall score of a node is the sum of scores of the node at different time stamps. We use the overall scores to measure the overall importance of nodes in an evolving graph.
Here are the overall scores of the three nodes.
\begin{lstlisting}
"A" => 0.666667
"B" => 0.202347
"C" => 0.305067
\end{lstlisting}
Notice that $A$ is the most important node in the evolving graph and $C$ is the second most important node.
Thus the ranking agrees with the Katz centrality rankings in Section \ref{sec:motivation}.
For Figure \ref{fig:katz_eg3}, we have
\begin{lstlisting}
TimeNode(A, 10) => 0.458333
TimeNode(A, 100) => 0.2
TimeNode(B, 1) => 0.2
TimeNode(B, 10) => 0.0
TimeNode(B, 100) => 0.0
TimeNode(C, 1) => 2.98666e-8
TimeNode(C, 10) => 0.291667
\end{lstlisting}
The sum of node scores at each time stamp is
\begin{lstlisting}
"A" => 0.658333
"B" => 0.2
"C" => 0.291667
\end{lstlisting}
We see the scores of all three nodes are decreased when it takes longer to communicate between different time stamps. In this case, the overall ranking is not affected but in general the communication time between time stamps can change the overall ranking.
\subsubsection{Temporal Resolvent Betweenness Centrality}
Betweenness centrality measures the importance of a node as the ability to facilitate the communication among other nodes in the network. Commonly it is defined based on the shortest paths. However, key messages do not necessarily follow the shortest paths and thus it makes sense to consider walks instead of shortest paths.
Recall that the $(i,j)$ entry of the resolvent matrix $(I - \alpha A)^{-1}$ provides information about the communication from $i$ to $j$.
By considering the difference in rating with and without edges linking to node $v$, we could define the resolvent betweenness for node $v$ as
\begin{equation}
\sum_{i}\sum_j \frac{(I-\alpha A)^{-1}_{i,j} - (I - \alpha (A - E_v))^{-1}_{i,j}}{(I - \alpha A)^{-1}_{i,j}}, \quad i \ne j, i \ne v, j \ne v.
\end{equation}
where $E_v$ has non-zeros only in row and column $v$, and in the $v$th row and column has $1$ where $A$ has $1$.
The matrix $A - E_i$ represents a graph with all edges involving the node $v$ removed.
For $(I-\alpha A)^{-1}_{i,j}$ we could simply modify Listing \ref{lst:katz} so that it only accumulates scores when the end node is $j$. For the $(I - \alpha (A-E_v))^{-1}_{i,j}$ part, we can apply the same algorithm on a graph with node $v$ and related edges removed.
Therefore the general temporal resolvent betweenness centrality can be defined by the algorithm in Listing \ref{lst:resolvent}.
\begin{lstlisting}[caption={Temporal Resolvent Betweenness of Single Node},label={lst:resolvent}, captionpos=b]
function temporal_resolvent_betweenness(g1, g2,
v::TimeNode; alpha = 0.2, k = 10)
r = 0.
ns = active_nodes(g1)
for i in ns
for j in ns
if i != j && i != v && j != v
score1 = temporal_katz(g1, i, j,
alpha = alpha, k = k)
score2 = temporal_katz(g2, i, j,
alpha = alpha, k = k)
r += score1 == 0.0 ? 0 :
(score1 - score2)/score1
end
end
end
return r
end
\end{lstlisting}
Here \texttt{temporal\_katz} only takes account of walks from node $i$ to $j$. Evolving graph
\texttt{g1} is the original evolving graph and evolving graph \texttt{g2} is \texttt{g1} with all the edges
related to \texttt{v} removed.
For example, by removing node $(A, 2)$, i.e., node $A$ at time stamp $2$, from Figure \ref{fig:katz_eg2} we find
the temporal resolvent betweenness score of node $A$ is $5.0$. While the temporal resolvent betweenness score of node $(B,3)$ is $4.0$. This shows that $(A,2)$ is more important than $(B,3)$.
We note that temporal communicability betweenness centrality can be analysed similarly.
\subsection{Temporal Closeness Centrality}
\label{sec:temp-betw-centr}
% closeness centrality
Let $d_{ij}$ be the length of the shortest path from $i$ and $j$ in a static graph. The closeness centrality of a node $i$ is a measure of importance calculated by considering all the shortest paths between node $i$ and all other nodes in the graph, that is
$$
C_i^{closeness} = \frac{N-1}{\sum_j d_{ij}},
$$
where $N-1$ is the total number of reachable nodes.
The more important a node is, the closer it is to all other nodes.
For evolving graphs, we need to consider the shortest temporal path from $(i,t_i)$ to $(j,t_j)$, for any time stamp $t_i \leq t_j$.
Depending on how we define the length of shortest temporal path, the closeness centrality can counts static edges, causal edges, or both.
The key difference between Katz centrality and closeness centrality is the fact here we only consider shortest temporal path, not every temporal walk.
We could apply BFS to find the shortest temporal path from $(v,t)$ to any other nodes in the evolving graphs. The algorithm is shown as the algorithm given in Listing
\ref{lst:closeness}.
\begin{lstlisting}[caption={Temporal Closeness of Single Node},label={lst:closeness}, captionpos=b]
function temporal_closeness(g::AbstractEvolvingGraph,
start::TimeNode)
v = start
level = Dict(v => 0)
i = 1
fronter = [v]
while length(fronter) > 0
next = []
for u in fronter
for v in forward_neighbors(g, u)
if !(v in keys(level))
if node_key(u) != node_key(v)
td = temporal_distance(start, u)
level[v] = i + td
else
level[v] = i - 1
end
push!(next, v)
end
end
end
fronter = next
i += 1
end
total_scores = sum(values(level))
return total_scores > 0. ?
(length(level) - 1)/total_scores : 0.
end
\end{lstlisting}
The algorithm starts at node \texttt{start} and explores the forward neighbours and store all the
reached nodes so far in \texttt{level}. If \texttt{node\_key(u) == node\_key(v)}, we traverse the same node at different time stamps. In this case, we consider that we've already reached it at the previous time stamp.
For Figure \ref{fig:katz_eg2} the scores are
\begin{lstlisting}
TimeNode(A, 2) => 0.666667
TimeNode(A, 3) => 1.0
TimeNode(B, 1) => 0.346154
TimeNode(B, 2) => 0.0
TimeNode(B, 3) => 0.0
TimeNode(C, 1) => 0.315789
TimeNode(C, 2) => 0.5
\end{lstlisting}
and the sum of scores at different time stamps gives
\begin{lstlisting}
"A" => 1.66667
"B" => 0.346154
"C" => 0.815789
\end{lstlisting}
Note that the ranking is the same as the temporal Katz centrality case.
For Figure \ref{fig:katz_eg3}, the scores are
\begin{lstlisting}
TimeNode(A, 10) => 0.0707071
TimeNode(A, 100) => 1.0
TimeNode(B, 1) => 0.0597015
TimeNode(B, 10) => 0.0
TimeNode(B, 100) => 0.0
TimeNode(C, 1) => 0.0390625
TimeNode(C, 10) => 0.0412371
\end{lstlisting}
and the sums are
\begin{lstlisting}
"A" => 1.07071
"B" => 0.0597015
"C" => 0.0802996
\end{lstlisting}
Similar to the temporal Katz centrality case, we see a decrease of centrality scores
for all the nodes in the evolving graph.
We note that the analysis for temporal betweenness centrality is similar to above.
\subsection{Temporal PageRank}
We see from our analysis for temporal Katz centrality and temporal betweenness centrality
that the temporal distance between nodes in an evolving graph can impact the final centrality scores.
We would like to design a temporal PageRank algorithm that takes account of
the temporal distance and the spatial distance. The generalisation is based on two ideas:
reversed temporal walks and block adjacency matrix.
\subsubsection{Reversed Temporal Walks}
We can not travel back in time. But reversing the direction of temporal flows can help
identify the ``hub'' nodes or the source of the flows. Recall in the HITS algorithm \cite{kleinberg99},
the hub score describes the quality of a web page as a link collection of important related pages.
In reverse PageRank we compute PageRank on the graph with reversed direction, i.e., reverse the direction of each edge $(i,j)$ to $(j,i)$. Fogaras \cite{fogaras03, gleich15} shows that Reversed Page Rank scores express hub quality.
Recall a time-preserving walk is a time-ordered sequence of active nodes. We define a \emph{reversed temporal walk} to be a reversed time-ordered sequence of active nodes
$\langle (v_1, t_1), (v_2,t_2), \ldots, (v_m, t_m)\rangle$, where
$t_1 \ge t_2 \ge \cdots \ge t_m$.
\subsubsection{Block Adjacency Matrices}
\label{sec:centr-block-adjac}
Aggregating edges at each time stamp to form
a simple static graph can provide misleading information about the structure of the network.
In \cite{chen16}, we derive a block adjacency matrix representation of an evolving graph and
show it is possible to represent an evolvig graph as a block adjacency matrix.
Let $E'$ be the set of causal edges and $\hat E$ be the set of static edges. Then
an evolving graph can be represented as
$$
\bm M_n =
\begin{pmatrix}
A^{[t_1]} & M^{[t_1, t_2]} & \ldots & M^{[t_1, t_n]} \\
0 & A^{[t_2]} & \ldots & M^{[t_2, t_n]} \\
& \ldots & & \\
0 & 0 & \ldots & A^{[t_n]}
\end{pmatrix},
$$
where $M^{[t_i, t_j]}$ is the matrix whose rows are labeled by $V^{[t_i]}$ and columns are labeled by $V^{[t_j]}$, and whose entries are
$$
M_{uv}^{[t_i, t_j]} =
\begin{cases}
1 & \mbox{if} (u, v) \in E' \\
0 & \mbox{otherwise}.
\end{cases}
$$
The adjacency matrix blocks $A^{[t]}$ encode the static edge set $\hat E$, whereas the off-diagonal blocks $M^{[t_i, t_j]}$ encode the causal edge set $E'$. To take account of the temporal distance between nodes, we could let
$$
M_{uv}^{[t_i, t_j]} =
\begin{cases}
\frac{1}{|t_j - t_i|} & \mbox{if} (u, v) \in E' \\
0 & \mbox{otherwise}.
\end{cases}
$$
For Figure \ref{fig:eg_shortest_walk}, we have
\begin{align*}
V & = \{(1,\!1998), (2,\!1998), (1,\!1999), (3,\!1999), (2,\!2018), (3,\!2018)\},\\
\tilde E &= \{((1,\!1998), (2,\!1998)), ((1,\!1999), (3,\!1999)), ((2,\!2018), (3,\!2018))\},\\
E' &= \{((1,\!1998), (1,\!1999)), ((2,\!1999), (2,\!2018)), ((3,\!1999), (3,\!2018))\}
\end{align*}
and corresponding block adjacency matrix is
\[
\bm M_3 = \begin{pmatrix}
0 & 1 & 1 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 1/9 & 0 \\
0 & 0 & 0 & 1 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 1/9 \\
0 & 0 & 0 & 0 & 0 & 1 \\
0 & 0 & 0 & 0 & 0 & 0
\end{pmatrix}.
\]
The static graph representation of Figure \ref{fig:eg_shortest_walk} is shown in Figure \ref{fig:static}.
\begin{figure}[h]
\begin{center}
\begin{tikzpicture}[scale=.36, line width =1.4pt]
\node[circle,fill=green1, minimum size=0.1cm, inner sep = 0pt] (n7) at (-5,7)
{\scriptsize $(2,t_1)$};
\node[circle,draw=red1, minimum size=0.2cm, inner sep = 0pt] (n8) at (-10,5)
{\scriptsize $(3,t_1)$};
\node[circle,fill=green1, minimum size=0.1cm, inner sep = 0pt] (n10) at (-8,10) {\scriptsize $(1,t_1)$};
\node[circle,draw=red1, minimum size=0.2cm, inner sep = 0pt] (n6) at (3,7)
{\scriptsize $(2,t_2)$};
\node[circle,fill=green1, minimum size=0.2cm, inner sep = 0pt] (n4) at (-2,5)
{\scriptsize $(3,t_2)$};
\node[circle,fill=green1, minimum size=0.2cm, inner sep = 0pt] (n1) at (0,10)
{\scriptsize $(1,t_2)$};
\node[circle,fill=green1, minimum size=0.2cm, inner sep = 0pt] (n11) at (11,7)
{\scriptsize $(2,t_3)$};
\node[circle,fill=green1, minimum size=0.2cm, inner sep = 0pt] (n12) at (6,5)
{\scriptsize $(3,t_3)$};
\node[circle,draw=red1, minimum size=0.2cm, inner sep = 0pt] (n14) at (8,10)
{\scriptsize $(1,t_3)$};
\foreach \from/\to in {n10/n7, n1/n4, n11/n12}
\draw[every edge,->] (\from) -- (\to);
\draw[dotted,->](n7) to[out=80, in=40] (n11);
\draw[dotted,->](n10) to[out=10, in=180] (n1);
\draw[dotted,->](n4) to[out=-30,in=-150] (n12);
\end{tikzpicture}
\end{center}
\caption[A static graph corresponding to the evolving graph example of
Figure~\ref{fig:eg_shortest_walk}.]{A static graph corresponding to the evolving graph example of
Figure~\ref{fig:eg_shortest_walk}.
The black lines are edges in the static edge set $\tilde E$ and are encoded
algebraically in the diagonal blocks $A^{[t]}$ of the adjacency matrix $\bm M_3$.
The dotted lines are edges in the causal edge set $E'$ and are encoded algebraically in
the off-diagonal blocks $M^{[t_i, t_j]}$.
The graph containing all the edges and temporal nodes has adjacency matrix $\bm M_3$.}
\label{fig:static}
\end{figure}
Notice that the lower triangular blocks of matrix $\bm M_n$ are zero.
This is because all the causal edges are time preserving. We consider the transpose of matrix $\bm M_n$, which reverse the direction of static edges and causal edges.
Let $b \in [0,1]$ be a balance scalar. Then the block matrix
$$
b \bm M_n + (1-b) \bm M_n^T
$$
takes account of both the temporal walks and reversed temporal walks.
We now derive the block Google matrix as
\begin{lstlisting}
function block_google_matrix(g; alpha = 0.85, balance = 0.75)
M = full(block_adjacency_matrix(g))
M = balance * M + (1-balance) * M'
N, N = size(M)
p = ones(N)/N
dangling_weights = p
dangling_nodes = find(x->x==0, sum(M,2))
for node in dangling_nodes
M[node,:] = dangling_weights
end
M ./= sum(M,2)
return alpha * M .+ (1- alpha) * p
end
\end{lstlisting}
The function \texttt{block\_adjacency\_matrix} converts an evolving graph $g$ to a block adjacency matrix $\bm M$. Every row sum of the block Google matrix is equal to one. Each element of the dominant eigenvector of the block Google matrix
represents the PageRank rating of a node.
The rank of each node can be computed iteratively from the block Google matrix using the power method.
For Figure \ref{fig:katz_eg2}, we let $alpha$ be $0.85$ and $balance$ be $1.0$.
% power method
Then the PageRank scores are
\begin{lstlisting}
TimeNode(A, 2) => 0.289123
TimeNode(A, 3) => 0.128054
TimeNode(B, 1) => 0.182477
TimeNode(B, 2) => 0.354335
TimeNode(B, 3) => 0.480941
TimeNode(C, 1) => 0.128054
TimeNode(C, 2) => 0.250931
\end{lstlisting}
and the sum of node scores at all three time stamps are
\begin{lstlisting}
"A" => 0.417177
"B" => 1.01775
"C" => 0.378986
\end{lstlisting}
We see node $B$ is the most important node in an evolving graph.
On the other extreme, if we set $balance$ to be $0.0$, we get the overall scores
\begin{lstlisting}
"A" => 0.678502
"B" => 0.647353
"C" => 0.527927
\end{lstlisting}
Thus $A$ is the most important node in this case.
One potential problem with temporal PageRank is that the computational cost is expensive for large evolving graphs with many time stamps. The dimension of the block matrix
is $|V| \times |T|$, where $|V|$ represents the number of nodes and $|T|$ represents the number of time stamps.
\section{Experiments}
\label{sec:experiments}
We conduct the following experiments using graph type \texttt{IntAdjacencyList} from EvolvingGraphs.jl, which represents an evolving graph $g$ with integer nodes and integer time stamps as adjacency lists. In particular, each active node in $g$ stores its forward neighbours in a list. We count both static edges and causal edges.
The following experiments study the influence of temporal distance on centrality scores. All experiments are conducted on a MacOS system with $8$ GB of RAM running at $2.3$ GHz clock speed.
\subsection{Random Evolving Graphs}
\label{sec:random}
% need to compute it for all 500 nodes and check the changes.
% maybe run it on AWS.
We generate a random evolving graph $g$ with $500$ nodes and $499183$ static edges and $10$ time stamps.
We focus on studying the changes of rating of the following nodes.
The second element in each tuple is the Katz Centrality rating computed using \eqref{eq:q}.
\begin{lstlisting}
(459, 0.126296)
(147, 0.116541)
(326, 0.113667)
(183, 0.108705)
(469, 0.0936847)
(296, 0.0925748)
(424, 0.0921004)
(377, 0.091973)
(313, 0.0910352)
(127, 0.089123)
\end{lstlisting}
% Instead of running temporal Katz centrality for each active node in the evolving graph $g$,
% we only consider these $10$ most important nodes because in practice we usually are just interested in the rankings and ratings of the most important nodes.
For each node in the above list,
we compute the node score at each time stamp and the overall score.
Here we set $alpha = 0.5$ and $max\_level = 3$. The results are shown in Table \ref{table:ranking} and Table \ref{table:ranking2}.
Table \ref{table:ranking} shows the ratings where the causal edges
all have edge weight zero. Table \ref{table:ranking2} shows the ratings where the weights of causal edges are their temporal differences.
For each table we notice that ratings (and rankings) at the first time stamp $t_1$ are very different from the overall ratings (and rankings).
Comparing the two tables, we observe that the Katz ratings in general are smaller when we take account of the temporal information. The rankings, however, are stable. For rankings at time stamp $1$, only the top two nodes $459$ and $469$ change positions. The overall rankings stays unchanged.
\begin{table}[h]
\begin{center}
\begin{tabular}{ c | c | c | c }
Ranking at $t_1$ & Rating at $t_1$ & Overall ranking & Overall rating \\ \hline
469 & $0.7193$ & 183 & $5.975$ \\
459 & $0.7180$ & 469 & $5.932$ \\
424 & $0.7108$ & 326 & $5.804$ \\
296 & $0.7042$ & 313 & $5.797$ \\
326 & $0.6853$ & 459 & $5.788$ \\
183 & $0.6647$ & 147 & $5.782$ \\
377 & $0.6622$ & 377 & $5.691$ \\
127 & $0.6316$ & 296 & $5.681$ \\
147 & $0.6303$ & 424 & $5.636$ \\
313 & $0.5715$ & 127 & $5.613$ \\
\end{tabular}
\end{center}
\caption{Temporal Katz centrality ranking and rating on evolving graph $g$.
We set the weight of all causal edges to zero.}
\label{table:ranking}
\end{table}
\begin{table}[h]
\begin{center}
\begin{tabular}{ c | c | c | c }
Ranking at $t_1$ & Rating at $t_1$ & Overall ranking & Overall rating \\ \hline
\textbf{459} & $0.6290$ & 183 & $5.576$ \\
\textbf{469} & $0.6281$ & 469 & $5.533$ \\
424 & $0.6236$ & 326 & $5.414$ \\
296 & $0.6167$ & 313 & $5.409$ \\
326 & $0.5983$ & 459 & $5.401$ \\
183 & $0.5778$ & 147 & $5.396$ \\
377 & $0.5775$ & 377 & $5.311$ \\
127 & $0.5492$ & 296 & $5.299$ \\
147 & $0.5471$ & 424 & $5.258$ \\
313 & $0.4916$ & 127 & $5.237$ \\
\end{tabular}
\end{center}
\caption{Temporal Katz centrality ranking and rating on evolving graph $g$.
We set the weight of a causal edge between two active nodes to be their temporal difference.
We use bold font to highlight the differences in ranking from Table \ref{table:ranking}.}
\label{table:ranking2}
\end{table}
For the same random evolving graph $g$, we also compute temporal closeness centrality scores.
The results are shown in Table \ref{table:ranking3} and Table \ref{table:ranking4}.
As before, for both tables the ratings (and rankings) at the first
time stamp $t_1$ are very different from the overall ratings (and rankings).
However, when we compare the two tables Table \ref{table:ranking3}
and \ref{table:ranking4}, we notice that
for temporal closeness centrality most of rankings are changed when we take account of the temporal information.
\begin{table}[h]
\begin{center}
\begin{tabular}{ c | c | c | c }
Ranking at $t_1$ & Rating at $t_1$ & Overall ranking & Overall rating \\ \hline
296 & $0.4938$ & 469 & $5.040$ \\
469 & $0.4933$ & 183 & $5.030$ \\
459 & $0.4915$ & 313 & $5.016$ \\
424 & $0.4888$ & 326 & $5.009$ \\
326 & $0.4879$ & 147 & $5.008$ \\
377 & $0.4872$ & 459 & $5.003$ \\
183 & $0.4858$ & 296 & $4.002$ \\
127 & $0.4831$ & 377 & $4.996$ \\
147 & $0.4790$ & 127 & $4.989$ \\
313 & $0.4773$ & 424 & $4.986$ \\
\end{tabular}
\end{center}
\caption{Temporal closeness centrality ranking and rating of evolving graph $g$.
We set the weight of all causal edges to zero.}
\label{table:ranking3}
\end{table}
\begin{table}[h]
\begin{center}
\begin{tabular}{ c | c | c | c }
Ranking at $t_1$ & Rating at $t_1$ & Overall ranking & Overall rating \\ \hline